This is a scRNAseq data analysis of CRC tumor infiltrating cells (heavily focused on NK and ILC1).

setwd("/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC")
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 'SeuratObject' was built under R 4.4.0 but the current version is
## 4.4.1; it is recomended that you reinstall 'SeuratObject' as the ABI
## for R may have changed
## 'SeuratObject' was built with package 'Matrix' 1.7.0 but the current
## version is 1.7.2; it is recomended that you reinstall 'SeuratObject' as
## the ABI for 'Matrix' may have changed
## 
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
## 
##     intersect, t
library(SeuratDisk)
## Registered S3 method overwritten by 'SeuratDisk':
##   method            from  
##   as.sparse.H5Group Seurat
library(ggplot2)
library(DoubletFinder)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(SeuratData)
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(ggplot2)
library(metap)
library(clusterProfiler)
## 
## clusterProfiler v4.12.6 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
## 
## Please cite:
## 
## S Xu, E Hu, Y Cai, Z Xie, X Luo, L Zhan, W Tang, Q Wang, B Liu, R Wang,
## W Xie, T Wu, L Xie, G Yu. Using clusterProfiler to characterize
## multiomics data. Nature Protocols. 2024, 19(11):3292-3320
## 
## Attaching package: 'clusterProfiler'
## 
## The following object is masked from 'package:purrr':
## 
##     simplify
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(org.Mm.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## 
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## 
## The following object is masked from 'package:SeuratObject':
## 
##     intersect
## 
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
##     tapply, union, unique, unsplit, which.max, which.min
## 
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## 
## The following object is masked from 'package:clusterProfiler':
## 
##     rename
## 
## The following objects are masked from 'package:lubridate':
## 
##     second, second<-
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## 
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## The following object is masked from 'package:utils':
## 
##     findMatches
## 
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## 
## Attaching package: 'IRanges'
## 
## The following object is masked from 'package:clusterProfiler':
## 
##     slice
## 
## The following object is masked from 'package:lubridate':
## 
##     %within%
## 
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## The following object is masked from 'package:purrr':
## 
##     reduce
## 
## The following object is masked from 'package:sp':
## 
##     %over%
## 
## 
## Attaching package: 'AnnotationDbi'
## 
## The following object is masked from 'package:clusterProfiler':
## 
##     select
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(enrichplot)

Initially I exclude sample 5, since it has insufficient data to contribute to the analysis efficiently. I will try to subsequently save it in a later chunck.

mtx_sample1 <- ReadMtx(mtx = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag01_mm/Fionda-2024-R2_SampleTag01_mm_RSEC_MolsPerCell_MEX/matrix.mtx.gz", features = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag01_mm/Fionda-2024-R2_SampleTag01_mm_RSEC_MolsPerCell_MEX/features.tsv.gz", cells = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag01_mm/Fionda-2024-R2_SampleTag01_mm_RSEC_MolsPerCell_MEX/barcodes.tsv.gz")

mtx_sample2 <- ReadMtx(mtx = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag02_mm/Fionda-2024-R2_SampleTag02_mm_RSEC_MolsPerCell_MEX/matrix.mtx.gz", features = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag02_mm/Fionda-2024-R2_SampleTag02_mm_RSEC_MolsPerCell_MEX/features.tsv.gz", cells = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag02_mm/Fionda-2024-R2_SampleTag02_mm_RSEC_MolsPerCell_MEX/barcodes.tsv.gz")

mtx_sample3 <- ReadMtx(mtx = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag03_mm/Fionda-2024-R2_SampleTag03_mm_RSEC_MolsPerCell_MEX/matrix.mtx.gz", features = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag03_mm/Fionda-2024-R2_SampleTag03_mm_RSEC_MolsPerCell_MEX/features.tsv.gz", cells = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag03_mm/Fionda-2024-R2_SampleTag03_mm_RSEC_MolsPerCell_MEX/barcodes.tsv.gz")

mtx_sample4 <- ReadMtx(mtx = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag04_mm/Fionda-2024-R2_SampleTag04_mm_RSEC_MolsPerCell_MEX/matrix.mtx.gz", features = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag04_mm/Fionda-2024-R2_SampleTag04_mm_RSEC_MolsPerCell_MEX/features.tsv.gz", cells = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag04_mm/Fionda-2024-R2_SampleTag04_mm_RSEC_MolsPerCell_MEX/barcodes.tsv.gz")

#mtx_sample5 <- ReadMtx(mtx = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag05_mm/Fionda-2024-R2_SampleTag05_mm_RSEC_MolsPerCell_MEX/matrix.mtx.gz", features = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag05_mm/Fionda-2024-R2_SampleTag05_mm_RSEC_MolsPerCell_MEX/features.tsv.gz", cells = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag05_mm/Fionda-2024-R2_SampleTag05_mm_RSEC_MolsPerCell_MEX/barcodes.tsv.gz")

mtx_sample6 <- ReadMtx(mtx = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag06_mm/Fionda-2024-R2_SampleTag06_mm_RSEC_MolsPerCell_MEX/matrix.mtx.gz", features = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag06_mm/Fionda-2024-R2_SampleTag06_mm_RSEC_MolsPerCell_MEX/features.tsv.gz", cells = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag06_mm/Fionda-2024-R2_SampleTag06_mm_RSEC_MolsPerCell_MEX/barcodes.tsv.gz")

mtx_sample7 <- ReadMtx(mtx = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag07_mm/Fionda-2024-R2_SampleTag07_mm_RSEC_MolsPerCell_MEX/matrix.mtx.gz", features = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag07_mm/Fionda-2024-R2_SampleTag07_mm_RSEC_MolsPerCell_MEX/features.tsv.gz", cells = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag07_mm/Fionda-2024-R2_SampleTag07_mm_RSEC_MolsPerCell_MEX/barcodes.tsv.gz")

mtx_sample8 <- ReadMtx(mtx = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag08_mm/Fionda-2024-R2_SampleTag08_mm_RSEC_MolsPerCell_MEX/matrix.mtx.gz", features = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag08_mm/Fionda-2024-R2_SampleTag08_mm_RSEC_MolsPerCell_MEX/features.tsv.gz", cells = "/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/mCRC_ILC/Fionda-2024-R2_SampleTag08_mm/Fionda-2024-R2_SampleTag08_mm_RSEC_MolsPerCell_MEX/barcodes.tsv.gz")

Next, I create the Seurat objects for each sample.

seurat_s1 <- CreateSeuratObject(counts = mtx_sample1, project = "Sample_1", min.cells = 3, min.features = 200)
seurat_s2 <- CreateSeuratObject(counts = mtx_sample2, project = "Sample_2", min.cells = 3, min.features = 200)
seurat_s3 <- CreateSeuratObject(counts = mtx_sample3, project = "Sample_3", min.cells = 3, min.features = 200)
seurat_s4 <- CreateSeuratObject(counts = mtx_sample4, project = "Sample_4", min.cells = 3, min.features = 200)
#seurat_s5 <- CreateSeuratObject(counts = mtx_sample5, project = "Sample_5", min.cells = 3, min.features = 200)
seurat_s6 <- CreateSeuratObject(counts = mtx_sample6, project = "Sample_6", min.cells = 3, min.features = 200)
seurat_s7 <- CreateSeuratObject(counts = mtx_sample7, project = "Sample_7", min.cells = 3, min.features = 200)
seurat_s8 <- CreateSeuratObject(counts = mtx_sample8, project = "Sample_8", min.cells = 3, min.features = 200)

Subsequently, I perform some of the steps of the Seurat analysis as described in the documentation for Seurat project (https://satijalab.org/seurat/articles/integration_introduction). The remaining steps are completed after merging the samples.

Sample 1

#Quality control
seurat_s1[["percent.mt"]] <- PercentageFeatureSet(seurat_s1, pattern = "^mt-") 
#View(seurat_s1@meta.data)

VlnPlot(seurat_s1, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

FeatureScatter(seurat_s1, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s1, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s1, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

seurat_s1.filtered <- subset(seurat_s1, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10)

FeatureScatter(seurat_s1.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s1.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s1.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

#Normalization
s1_normalized <- NormalizeData(object = seurat_s1.filtered)
## Normalizing layer: counts
#Integration
s1_integrated <- FindVariableFeatures(object = s1_normalized)
## Finding variable features for layer counts
top10 <- head(VariableFeatures(s1_integrated), 10)
top10
##  [1] "Hspa1b"  "Gm11290" "Hspa1a"  "Ly6c2"   "Klra5"   "Il7r"    "Plac8"  
##  [8] "Stmn1"   "Cxcl10"  "Gzmb"
plot1 <- VariableFeaturePlot(s1_integrated)
plot1
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

#Scaling
s1_scaled <- ScaleData(object = s1_integrated)
## Centering and scaling data matrix
#Linear dimensionality reduction
s1_reduced <- RunPCA(object = s1_scaled) 
## PC_ 1 
## Positive:  Gzma, Prf1, Gzmb, Ccl5, Irf8, Ccl3, Ccl4, Itgam, Cma1, Lgals1 
##     Actb, Plek, Klrg1, Ncr1, Klra9, Cdc20b, Sgk1, Serpinb9, Serpinb9b, ENSMUSG00000121360 
##     Ccnd2, H1f2, Eomes, Klri2, Nr4a1, Klrb1a, Zeb2, Cx3cr1, Klra4, Klra8 
## Negative:  Emb, Ly6e, Il7r, Tcf7, Cd3e, Gpr183, Ikzf2, Trgc4, Cd3g, Ltb 
##     Inpp4b, Cd7, Cd28, Camk4, Rora, Thy1, Cxcr6, Gramd3, Slco3a1, Jaml 
##     Lyst, Cd3d, Itgb3, Ly6a, Trgc2, Ssh2, Zfp36l1, Plac8, Actn1, Tmem176b 
## PC_ 2 
## Positive:  Dhrs3, Eomes, Stat3, Bcl2l11, Vegfa, Pmepa1, Ccr2, Tgfb1, Car2, Myb 
##     Cemip2, Rnf157, Cxcr4, Tspan13, Skil, Ctla2a, Dgat1, Malt1, Fosl2, Irf8 
##     Birc3, Vps37b, Irak2, Plxnc1, Gpr132, Bach2, Cux1, Crem, Gzma, Sult2b1 
## Negative:  Cd3e, Cd3g, Ly6c2, S100a6, Actb, Cd3d, Trgc2, Cd226, Cxcr6, Trgc4 
##     Il7r, Jaml, Trgc1, Hopx, Slfn1, Ly6a, Cd160, S100a4, Ncr1, Inpp4b 
##     Slco3a1, Bcl11b, Trgv2, Actn1, Trbc2, Dpp4, Trac, Ccnd2, Trgv1, Cd8a 
## PC_ 3 
## Positive:  Pim1, Nr4a1, Nfkbiz, Nfkbia, Serpinb9, Nr4a3, Bhlhe40, Kdm6b, Ifrd1, Nfkbid 
##     Vps37b, Icam1, Vegfa, Fosl2, Dusp5, Gadd45b, Nabp1, Spry2, Irf8, Cdkn1a 
##     Nfkbie, Phlda1, Birc3, Crem, Mxd1, Nr4a2, Tnfrsf9, Serpinb6b, Rel, Gem 
## Negative:  Fos, Jun, Hspa1b, Klf2, Fosb, Hspa1a, Egr1, Dnajb1, Arl4d, Klra4 
##     Cited4, Pmepa1, Psap, Gm12185, S1pr1, 9930111J21Rik2, Ccr2, ENSMUSG00000121360, Ctla2a, Tcf7 
##     Il18r1, Klf6, Klra8, ENSMUSG00000120357, Rhob, Ifi208, Myb, Eomes, Trdc, Ifi213 
## PC_ 4 
## Positive:  Cd3e, Cd3g, Cd3d, Trgc2, Gramd3, Trgc1, Nr4a1, Dpp4, Ikzf2, Trac 
##     Vps37b, Actn1, Il7r, Nfkbiz, Trgv2, Trbc2, Nfkbid, Nsg2, Nfkbia, Itk 
##     Kdm6b, Trgc4, Bcl11b, Dusp5, Cd28, Spry2, Nr4a3, Cd5, Thy1, Trgv1 
## Negative:  Ifitm3, H2-Aa, Cd74, H2-Ab1, Lyz2, Ccl6, App, Apoe, Mmp14, Clec7a 
##     Csf1r, Spi1, Myof, Mpeg1, Ly6i, ENSMUSG00000120878, Alox5ap, Lst1, H2-Eb1, Nlrp3 
##     Ncf2, Ifi211, H2-DMb1, Mafb, Adgre1, Cd68, Tlr13, Il1b, Ms4a8a, Pla2g4a 
## PC_ 5 
## Positive:  Cd3e, Cd3g, Cd3d, Trac, Trgc2, Slfn1, Cxcr4, Dpp4, Trgc1, Pmepa1 
##     Kdm6b, Ass1, Vegfa, Fth1, Pim1, Nsg2, Cd5, Fosl2, Crem, Trgv2 
##     Nfkbid, Gramd3, Vps37b, Klra6, Cdkn1a, Nr4a2, Rgs16, S1pr1, Ifi27l2a, A330040F15Rik 
## Negative:  Tmem176b, Gm36723, Ncr1, Ckb, Klrb1b, Tmem176a, Fgl2, Dscam, Rora, S100a4 
##     Krt83, Peak1, Adgrg3, Slamf7, Coro2a, Cxcr6, Klrc1, Sema6d, AU020206, Id2 
##     St6galnac3, Cd226, Asb2, Fosb, Gm2682, Etv6, Itgax, Fos, Il18r1, Ripor2
VizDimLoadings(s1_reduced, dims = 1:2, reduction = "pca")

DimPlot(s1_reduced, reduction = "pca") + NoLegend()

DimHeatmap(s1_reduced, dims = 1:20, cells = 500, balanced = TRUE)

ElbowPlot(s1_reduced)

#Clustering
s1_neighboors <- FindNeighbors(object = s1_reduced, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
s1_clusters <- FindClusters(object = s1_neighboors)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1660
## Number of edges: 64144
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6864
## Number of communities: 7
## Elapsed time: 0 seconds
head(Idents(s1_clusters), 5) #cluster IDs 1st 5 cells
##  14310  38613  44195  93453 157761 
##      1      0      0      3      0 
## Levels: 0 1 2 3 4 5 6
#UMAP
s1_umap <- RunUMAP(object = s1_clusters, dims = 1:20)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 00:28:53 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:28:53 Read 1660 rows and found 20 numeric columns
## 00:28:53 Using Annoy for neighbor search, n_neighbors = 30
## 00:28:53 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:28:53 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1617236c471
## 00:28:53 Searching Annoy index using 1 thread, search_k = 3000
## 00:28:53 Annoy recall = 100%
## 00:28:53 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:28:54 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:28:54 Commencing optimization for 500 epochs, with 64836 positive edges
## 00:28:55 Optimization finished
DimPlot(s1_umap, reduction = "umap")

#Doublet finder
#pK identification (no ground-truth)
sweep.res.list_s1 <- paramSweep(s1_umap, PCs = 1:20, sct = FALSE)
## Loading required package: fields
## Loading required package: spam
## Spam version 2.11-1 (2025-01-20) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction 
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
## 
## Attaching package: 'spam'
## 
## The following object is masked from 'package:stats4':
## 
##     mle
## 
## The following objects are masked from 'package:base':
## 
##     backsolve, forwardsolve
## 
## Loading required package: viridisLite
## 
## Try help(fields) to get started.
## Loading required package: parallel
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_s1 <- summarizeSweep(sweep.res.list_s1, GT = FALSE)
## Loading required package: KernSmooth
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## Loading required package: ROCR
bcmvn_s1 <- find.pK(sweep.stats_s1)

## NULL
ggplot(bcmvn_s1, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()

#0.29 max
pK <- bcmvn_s1[bcmvn_s1$BCmetric == max(bcmvn_s1$BCmetric), "pK"]
pK <- as.numeric(as.character(pK)) 
pK
## [1] 0.29
#Homotypic doublet proportion estimate
annotations <- s1_umap@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.05*nrow(s1_umap@meta.data)) ##in project description
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
nExp_poi.adj #expected number of doublets
## [1] 67
#DoubletFinder
s1_finalized <- doubletFinder(s1_umap,
                              PCs = 1:20,
                              pK = pK,
                              nExp = nExp_poi.adj,
                              reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 553 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(s1_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.29_67")

table(s1_finalized@meta.data$DF.classifications_0.25_0.29_67)
## 
## Doublet Singlet 
##      67    1593
s1_finalized <- subset(s1_finalized, subset = DF.classifications_0.25_0.29_67 == "Singlet")
table(s1_finalized@meta.data$DF.classifications_0.25_0.29_67)
## 
## Singlet 
##    1593
DimPlot(s1_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.29_67")

Sample 2

#Quality control
seurat_s2[["percent.mt"]] <- PercentageFeatureSet(seurat_s2, pattern = "^mt-") 
#View(seurat_s2@meta.data)

VlnPlot(seurat_s2, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

FeatureScatter(seurat_s2, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s2, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s2, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

seurat_s2.filtered <- subset(seurat_s2, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10)

FeatureScatter(seurat_s2.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s2.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s2.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

#Normalization
s2_normalized <- NormalizeData(object = seurat_s2.filtered)
## Normalizing layer: counts
#Integration
s2_integrated <- FindVariableFeatures(object = s2_normalized)
## Finding variable features for layer counts
top10 <- head(VariableFeatures(s2_integrated), 10)
top10
##  [1] "Hspa1b"  "Trac"    "Gm11290" "Il7r"    "Klra5"   "Plac8"   "Gzmb"   
##  [8] "Hspa1a"  "Ccl4"    "Ccl3"
plot1 <- VariableFeaturePlot(s2_integrated)
plot1
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

#Scaling
s2_scaled <- ScaleData(object = s2_integrated)
## Centering and scaling data matrix
#Linear dimensionality reduction
s2_reduced <- RunPCA(object = s2_scaled) 
## PC_ 1 
## Positive:  Emb, Il7r, Camk4, Ly6e, Gpr183, Tcf7, Cd3e, Inpp4b, Itgb3, Gramd3 
##     Cxcr6, Lyst, Ikzf2, Ltb, Gna13, Cd3g, Rora, Gpr132, Cd3d, Cd28 
##     Xcl1, Zfp36l1, Trac, Odc1, Plac8, Smad7, Lilrb4b, Trgc4, Zc3hav1, Ly6a 
## Negative:  Gzma, Prf1, Ccl5, Gzmb, Ccl3, Ccl4, Irf8, Itgam, Lgals1, Cma1 
##     Actb, S1pr5, Plek, Klrg1, Klra9, Ncr1, Zeb2, Cdc20b, Sgk1, Klf2 
##     ENSMUSG00000121360, Klri2, H1f2, Itga2, Ccnd2, Klra4, Serpinb9b, Rap1gap2, Ifng, Eomes 
## PC_ 2 
## Positive:  Eomes, Dhrs3, Myb, Stat3, Ctla2a, Slc26a11, Satb1, Plxnc1, Pmepa1, Ccr2 
##     Rnf157, Cemip2, Tgfb1, Cenpa, Tpd52, Klra4, Sft2d2, Bach2, Tcf7, Slc3a2 
##     Car2, Vegfa, Gstm1, Ssh2, Smad7, Klra8, Cxcr4, Ust, Mideas, Sult2b1 
## Negative:  S100a6, S100a4, Cd52, Actb, Ly6a, Cd226, Inpp4b, Cxcr6, Ly6c2, Adam8 
##     Cd74, Ifitm2, Ncr1, Il7r, Lgals1, Ccnd2, Cd3e, Vim, Fgl2, Clec7a 
##     Asb2, Cd3g, Tmem176b, AA467197, Trgc4, H2-Ab1, Lyz2, Capg, Cxcl2, Sgms1 
## PC_ 3 
## Positive:  Clec4e, Lyz2, Tgfbi, Tnfaip2, Nlrp3, Cxcl2, Il1b, Clec4d, Clec4n, Clec7a 
##     Csf2rb, Ptgs2, Cd74, Pirb, Vegfa, Lilra6, C5ar1, Ptafr, Inhba, Basp1 
##     Pstpip2, Spi1, AA467197, Csf2ra, Hcar2, Ifitm2, Cd300a, Nfam1, Ccr1, Haao 
## Negative:  Cd3e, Cd3g, Cd3d, Trac, Trgc2, Trgc1, Bcl11b, Trgv2, Trbc2, Dpp4 
##     Trgc4, Actn1, Trgv1, Nsg2, Ikzf2, Il7r, Cd5, Cd8a, Inpp4b, Trat1 
##     Dtx1, Lat, Cdh1, Cd226, Cxcr6, P2rx7, Grap2, Cd160, Adam19, Tnfaip8 
## PC_ 4 
## Positive:  Cd3e, Cd3d, Cd3g, Pmepa1, Trgc2, Dpp4, Nsg2, Trac, Fth1, Trgc1 
##     Clec4e, S1pr1, Dtx4, Nt5e, Atp1a3, Clec7a, Clk2, Trgv2, Lyz2, ENSMUSG00000121069 
##     Pygl, Clec4d, Clec4n, Csf2rb, Actn1, Cxcl2, Il1b, Klra6, Rgs10, Pstpip2 
## Negative:  Serpinb9, Spry2, Nabp1, Il12rb2, Dusp5, Irf8, Nr4a1, Ncr1, Dennd4a, Serpinb6b 
##     Plek, Nfkbiz, Prf1, Nr4a3, Ccnd2, Gm36723, Tmem176a, P2ry10, Pim1, Nfkbia 
##     Fgl2, Actb, Vegfa, Cd52, Ifrd1, Lilrb4b, Tmem176b, Mxd1, Nr4a2, Arsb 
## PC_ 5 
## Positive:  Tmem176a, Tmem176b, Klrb1b, Ckb, Vegfa, Krt83, Prnp, Il2ra, Rora, ENSMUSG00000121239 
##     Cxcr6, Dscam, Dgat1, Acpp, Gm36723, Fth1, Adgrg3, Hoxa5, Trgc4, Ltb 
##     Itgb3, Furin, Ccdc184, Septin8, Sdc4, Il7r, Scn1b, Nrgn, Hilpda, Serpina3g 
## Negative:  Utrn, Fos, Ifi208, Ifit1, Fosb, Rtp4, Ifit2, Ifit3, Rsad2, Ifi206 
##     Mx1, Cblb, Ifi209, Cd3e, Klf6, Slfn8, Ifit1bl1, Cd69, Ripor2, Haao 
##     Klra7, Klra6, Stat1, Phf21a, Samhd1, Ifi213, Elmo1, Isg15, Cmpk2, Cmah
VizDimLoadings(s2_reduced, dims = 1:2, reduction = "pca")

DimPlot(s2_reduced, reduction = "pca") + NoLegend()

DimHeatmap(s2_reduced, dims = 1:20, cells = 500, balanced = TRUE)

ElbowPlot(s2_reduced)

#Clustering
s2_neighboors <- FindNeighbors(object = s2_reduced, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
s2_clusters <- FindClusters(object = s2_neighboors)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 916
## Number of edges: 37290
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6433
## Number of communities: 5
## Elapsed time: 0 seconds
head(Idents(s2_clusters), 5) #cluster IDs 1st 5 cells
## 176454 252066 285663 298134 298967 
##      1      0      2      0      1 
## Levels: 0 1 2 3 4
#UMAP
s2_umap <- RunUMAP(object = s2_clusters, dims = 1:20)
## 00:29:18 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:29:18 Read 916 rows and found 20 numeric columns
## 00:29:18 Using Annoy for neighbor search, n_neighbors = 30
## 00:29:18 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:29:18 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1613f7c67a6
## 00:29:18 Searching Annoy index using 1 thread, search_k = 3000
## 00:29:18 Annoy recall = 100%
## 00:29:19 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:29:19 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:29:19 Commencing optimization for 500 epochs, with 35074 positive edges
## 00:29:20 Optimization finished
DimPlot(s2_umap, reduction = "umap")

#Doublet finder
#pK identification (no ground-truth)
sweep.res.list_s2 <- paramSweep(s2_umap, PCs = 1:20, sct = FALSE)
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_s2 <- summarizeSweep(sweep.res.list_s2, GT = FALSE)
bcmvn_s2 <- find.pK(sweep.stats_s2)

## NULL
ggplot(bcmvn_s2, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()

#0.11 max
pK <- bcmvn_s2[bcmvn_s2$BCmetric == max(bcmvn_s2$BCmetric), "pK"]
pK <- as.numeric(as.character(pK)) 
pK
## [1] 0.11
#Homotypic doublet proportion estimate
annotations <- s2_umap@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.05*nrow(s2_umap@meta.data)) ##in project description
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
nExp_poi.adj #expected number of doublets
## [1] 34
#DoubletFinder
s2_finalized <- doubletFinder(s2_umap,
                              PCs = 1:20,
                              pK = pK,
                              nExp = nExp_poi.adj,
                              reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 305 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(s2_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.11_34")

table(s2_finalized@meta.data$DF.classifications_0.25_0.11_34)
## 
## Doublet Singlet 
##      34     882
s2_finalized <- subset(s2_finalized, subset = DF.classifications_0.25_0.11_34 == "Singlet")
table(s2_finalized@meta.data$DF.classifications_0.25_0.11_34)
## 
## Singlet 
##     882
DimPlot(s2_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.11_34")

Sample 3

#Quality control
seurat_s3[["percent.mt"]] <- PercentageFeatureSet(seurat_s3, pattern = "^mt-") 
#View(seurat_s3@meta.data)

VlnPlot(seurat_s3, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

FeatureScatter(seurat_s3, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s3, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s3, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

seurat_s3.filtered <- subset(seurat_s3, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10)

FeatureScatter(seurat_s3.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s3.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s3.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

#Normalization
s3_normalized <- NormalizeData(object = seurat_s3.filtered)
## Normalizing layer: counts
#Integration
s3_integrated <- FindVariableFeatures(object = s3_normalized)
## Finding variable features for layer counts
top10 <- head(VariableFeatures(s3_integrated), 10)
top10
##  [1] "Hspa1b"    "Gm11290"   "Il7r"      "Klra5"     "Ccl3"      "Ccl9"     
##  [7] "Ly6c2"     "Hspa1a"    "Serpinb9b" "Gzmb"
plot1 <- VariableFeaturePlot(s3_integrated)
plot1

plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2

#Scaling
s3_scaled <- ScaleData(object = s3_integrated)
## Centering and scaling data matrix
#Linear dimensionality reduction
s3_reduced <- RunPCA(object = s3_scaled) 
## PC_ 1 
## Positive:  Emb, Il7r, Ly6e, Tcf7, Gpr183, Cd3e, Trgc4, Camk4, Ikzf2, Gpr132 
##     Itgb3, Lyst, Zfp36l1, Inpp4b, Gna13, Plac8, Cd3d, Trbc2, Gramd3, Cd28 
##     Peli1, Tmem176a, Icos, Cd3g, Irak2, Ltb, Rora, Retreg1, Smad7, Cxcr6 
## Negative:  Gzma, Prf1, Gzmb, Ccl3, Ccl4, Ccl5, Irf8, Itgam, Lgals1, Cma1 
##     Actb, Plek, Itgb2, S1pr5, Klrg1, Ncr1, Klra9, Cdc20b, Serpinb9, Sgk1 
##     Ccnd2, H1f2, Klra8, Zeb2, Dusp2, Serpinb9b, Klrb1a, Klf2, Il12rb2, ENSMUSG00000121360 
## PC_ 2 
## Positive:  Satb1, Ctla2a, Pmepa1, Slc26a11, Bcl2l11, Tcf7, Ssh2, Irak2, Cemip2, Myb 
##     Smad7, Peli1, Fryl, Eomes, Plxnc1, Cblb, Car2, Cxcr4, Tgfb1, Emb 
##     Sult2b1, Prkch, Tnrc6b, Ipcef1, Mideas, Tpd52, Klra4, S1pr1, Dennd4a, ENSMUSG00000121360 
## Negative:  Tgm2, Ifitm2, Alox5ap, Cd74, Ifitm3, Gpr35, Gatm, Il1b, Ccr1, Pirb 
##     App, Sirpa, Mpeg1, Ly6a, Ifi30, Ly86, AA467197, ENSMUSG00000120878, Ms4a6d, Tgfbi 
##     Plbd1, Basp1, Gm19765, Clec7a, Csf2ra, Arg1, Adam8, Ifi211, Ier3, Ifitm1 
## PC_ 3 
## Positive:  Trgc4, Il7r, Inpp4b, Cd160, Cd3e, Ly6c2, Cd226, Cxcr6, Cd3g, mt-Nd4 
##     Tmem176b, Ncr1, Cd3d, Hopx, S100a6, Trac, Cd52, Asb2, Tmem176a, Thy1 
##     Klrc1, Actb, Gpr183, Il2ra, Itgb2, Bcl11b, Trgc1, Camk4, Gpr34, Trgv1 
## Negative:  Eomes, Tgfb1, Cemip2, Stat3, Pmepa1, Tpd52, Plxnc1, Klra4, Vegfa, Myb 
##     Bcl2l11, Tspan13, Gzma, Ccr2, Fryl, ENSMUSG00000121360, Ctla2a, Irf8, Fth1, Klra8 
##     Fosl2, Klri2, Ndrg1, Cux1, Sult2b1, Smad7, Plac8, Cdkn1a, Tiparp, Malt1 
## PC_ 4 
## Positive:  Pmepa1, Fth1, Cd3e, Aldoa, Cd3d, Osgin1, Cd3g, Cd8a, Trac, Rgs10 
##     Ifi27l2a, Cd6, S1pr1, Plcg1, Tspan13, Aim2, Dzank1, Cd5, Cd8b1, Gm8369 
##     Trgv2, Ass1, Actn1, Rgs16, Spsb1, Havcr2, Trgc2, Ctla2a, Nt5e, 2210408F21Rik 
## Negative:  Ncr1, Spry2, Plek, Arsb, Ripor2, mt-Nd4, Gm2682, Nr4a1, Dennd4a, Pitpnc1 
##     Itga2, Aoah, Dock8, Slc9a9, Celf2, Serpinb9, Fosb, Dock10, Elmo1, Atp8b4 
##     Itgax, Ccr5, Zeb2, Ifi203, Lpp, Nfkbiz, Vim, Itgb2, Gem, Actb 
## PC_ 5 
## Positive:  Klf2, Fos, Jun, Cd3e, Cd3g, ENSMUSG00000121360, Klra4, Cmah, S1pr5, Cd3d 
##     Kif13b, Ncoa1, Wdr49, Zfp182, Tex9, Carns1, Tnik, Specc1, Rfx3, 9930111J21Rik2 
##     Prkch, Gm2682, Gpatch3, 4933406I18Rik, Zfp808, Hspa1b, Klra8, Kif16b, Ipcef1, Pvt1 
## Negative:  Vegfa, Fosl2, Dgat1, Serpinb9, Ifrd1, Pim1, Nfkbia, Nr4a3, Actg1, Nr4a1 
##     Kdm6b, P2ry10, Serpinb6b, Nfkbiz, Tnfrsf9, Crem, Mxd1, Irf8, Nudt4, Birc3 
##     Xcl1, Gpr132, Hilpda, Nfkbid, Icam1, Spry2, Cdkn1a, Cd52, Klrb1b, Car2
VizDimLoadings(s3_reduced, dims = 1:2, reduction = "pca")

DimPlot(s3_reduced, reduction = "pca") + NoLegend()

DimHeatmap(s3_reduced, dims = 1:20, cells = 500, balanced = TRUE)

ElbowPlot(s3_reduced)

#Clustering
s3_neighboors <- FindNeighbors(object = s3_reduced, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
s3_clusters <- FindClusters(object = s3_neighboors)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 850
## Number of edges: 35278
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6440
## Number of communities: 5
## Elapsed time: 0 seconds
head(Idents(s3_clusters), 5) #cluster IDs 1st 5 cells
##  12486  86980 203100 218885 227873 
##      0      2      3      2      0 
## Levels: 0 1 2 3 4
#UMAP
s3_umap <- RunUMAP(object = s3_clusters, dims = 1:20)
## 00:29:37 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:29:37 Read 850 rows and found 20 numeric columns
## 00:29:37 Using Annoy for neighbor search, n_neighbors = 30
## 00:29:37 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:29:37 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1616db9cb09
## 00:29:37 Searching Annoy index using 1 thread, search_k = 3000
## 00:29:37 Annoy recall = 100%
## 00:29:38 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:29:38 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:29:38 Commencing optimization for 500 epochs, with 32470 positive edges
## 00:29:39 Optimization finished
DimPlot(s3_umap, reduction = "umap")

#Doublet finder
#pK identification (no ground-truth)
sweep.res.list_s3 <- paramSweep(s3_umap, PCs = 1:20, sct = FALSE)
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_s3 <- summarizeSweep(sweep.res.list_s3, GT = FALSE)
bcmvn_s3 <- find.pK(sweep.stats_s3)

## NULL
ggplot(bcmvn_s3, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()

#0.25 max
pK <- bcmvn_s3[bcmvn_s3$BCmetric == max(bcmvn_s3$BCmetric), "pK"]
pK <- as.numeric(as.character(pK))
pK
## [1] 0.25
#Homotypic doublet proportion estimate
annotations <- s3_umap@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.05*nrow(s3_umap@meta.data)) ##in project description
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
nExp_poi.adj #expected number of doublets
## [1] 31
#DoubletFinder
s3_finalized <- doubletFinder(s3_umap,
                              PCs = 1:20,
                              pK = pK,
                              nExp = nExp_poi.adj,
                              reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 283 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(s3_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.25_31")

table(s3_finalized@meta.data$DF.classifications_0.25_0.25_31)
## 
## Doublet Singlet 
##      31     819
s3_finalized <- subset(s3_finalized, subset = DF.classifications_0.25_0.25_31 == "Singlet")
table(s3_finalized@meta.data$DF.classifications_0.25_0.25_31)
## 
## Singlet 
##     819
DimPlot(s3_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.25_31")

Sample 4

#Quality control
seurat_s4[["percent.mt"]] <- PercentageFeatureSet(seurat_s4, pattern = "^mt-") 
#View(seurat_s4@meta.data)

VlnPlot(seurat_s4, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

FeatureScatter(seurat_s4, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s4, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s4, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

seurat_s4.filtered <- subset(seurat_s4, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10)

FeatureScatter(seurat_s4.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s4.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s4.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

#Normalization
s4_normalized <- NormalizeData(object = seurat_s4.filtered)
## Normalizing layer: counts
#Integration
s4_integrated <- FindVariableFeatures(object = s4_normalized)
## Finding variable features for layer counts
top10 <- head(VariableFeatures(s4_integrated), 10)
top10
##  [1] "Gm11290"    "Ly6c2"      "Ccl3"       "Ccl4"       "Klra8"     
##  [6] "Xcl1"       "Il7r"       "St6galnac3" "Trac"       "Inpp4b"
plot1 <- VariableFeaturePlot(s4_integrated)
plot1

plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2

#Scaling
s4_scaled <- ScaleData(object = s4_integrated)
## Centering and scaling data matrix
#Linear dimensionality reduction
s4_reduced <- RunPCA(object = s4_scaled) 
## PC_ 1 
## Positive:  Gzma, Prf1, Ccl5, Irf8, Gzmb, Itgam, Eomes, Sell, Ccl3, ENSMUSG00000121360 
##     Cma1, Klf2, Klri2, ENSMUSG00000121359, Klra4, Zeb2, Actg1, Ccnd2, Ccl4, Klra8 
##     S1pr5, Emp3, Actb, Klrg1, Lgals1, Klra9, Itga2, Plek, Reep5, Klra7 
## Negative:  Il7r, Ly6e, Gpr183, Inpp4b, Cxcr6, Rora, Itga1, Camk4, Cd3g, Thy1 
##     Cd3e, Icos, Emb, Trac, Cd226, Cd3d, Cd28, Odc1, Gramd3, Ly6a 
##     Rbpj, Rhoh, Trgc4, Zfp36l1, Cd5, Trbc2, Tcf7, Cd8b1, Itgb3, Slc4a7 
## PC_ 2 
## Positive:  Cd86, Slamf1, Adam19, Cd8b1, Ly6a, Cd8a, Themis, Pomt2, 4930469K13Rik, Fam169b 
##     Il1r2, Ifi27l2a, Cd5, Cbx8, Cep170b, Cd3e, Snhg17, Slfn1, Gm43329, Havcr2 
##     Ptprs, Pdcd1, Cxcr6, Trat1, Parp3, Invs, Dis3, Ephx1, Trac, Cd3g 
## Negative:  Tcf7, Cd7, Car2, Trgc4, Pcgf5, Zfp36l2, Epb41, Clcn3, Immp2l, Dscam 
##     Ern1, ENSMUSG00000120992, Kit, Mideas, Tmem176b, Xcl1, Scn1b, Mpp7, Cxcr4, Abca1 
##     Map3k14, Stat4, Raph1, Il7r, Klrb1b, Itgb3, Fam172a, Rftn1, Emb, Ttc14 
## PC_ 3 
## Positive:  Bcl2l11, Myb, Slc26a11, Mau2, Cd27, Klhl9, Pmepa1, Tango6, Bend4, Ccr5 
##     Stoml1, Sell, Mxi1, Ube2d1, Cux1, Car2, Spock2, Cxcr4, Ifi27l2a, Vegfa 
##     Pgm3, Thada, Stat3, Nufip1, Cmah, Rps20, Fancl, Ptprs, Rttn, Pla2g6 
## Negative:  Gm36723, S100a6, Klrb1b, Klrg1, S100a4, Gpr34, Kansl1, Vim, Cep162, Rgs1 
##     Gmeb1, Tmem176b, Itga1, Zmym1, Sema6d, Ly6a2, Plek, Septin11, Rora, Bcl2a1d 
##     Capg, Cd226, Scn1b, Riok2, Nedd9, Aopep, H1f0, Immp2l, Nebl, Cd200r1 
## PC_ 4 
## Positive:  ENSMUSG00000120570, Gm43462, Snx16, Cdc14a, Mblac2, Hook1, Osbpl8, Ripor3, Vti1a, Stx2 
##     Phldb2, Gm2682, Tex9, Trps1, Ddx60, Ube2t, Plcxd2, Sidt1, Aff3, Fry 
##     Rfx3, Zfp64, Ubash3b, Rpf2, 4932438A13Rik, Cd101, Fbxo31, Dnaaf9, Scml4, Rad17 
## Negative:  Irak2, Rab4a, Serpinb1a, Itgb3, Prrt1, Zfyve16, Prnp, Icos, Cd6, Ass1 
##     Zfp808, Fam120c, Ar, Gzmc, Cep162, Cd200r1, Cxcr6, Map2k4, AA467197, Atat1 
##     Dse, Osgin1, Pdcd1, Tmem87b, Senp7, Akr1b10, Arrdc4, Brd9, P2ry10, Mypop 
## PC_ 5 
## Positive:  Slamf6, Ctla2a, Cd3d, Dpp4, Rps29, Irf2bp2, Trbc2, Gramd3, Rpl31, Ly6c2 
##     Nanp, Cep170, Camk4, Eif4a1, Amz2, Bcl11b, Trgc2, Klra1, Ddx60, Pign 
##     Rps20, Trgv2, Cd3e, H2-Ab1, Galnt12, Kbtbd11, Thy1, Rps26, Polb, Klhdc2 
## Negative:  Slc25a13, A430035B10Rik, Mast2, Exoc1, Uqcc1, Immp2l, Rgs9, 4930469K13Rik, ENSMUSG00000121069, Fbxo31 
##     Gm10033, Il1r2, Atf6, Atp8a2, Cbx8, Igf1r, Dbr1, Ugdh, Polr1e, Gm43329 
##     Cd86, Ttc27, Atrx, Ckap5, Tmem176b, Garin1a, Casp3, Tango6, Ccl25, Pomt2
VizDimLoadings(s4_reduced, dims = 1:2, reduction = "pca")

DimPlot(s4_reduced, reduction = "pca") + NoLegend()

DimHeatmap(s4_reduced, dims = 1:20, cells = 500, balanced = TRUE)
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.

ElbowPlot(s4_reduced)

#Clustering
s4_neighboors <- FindNeighbors(object = s4_reduced, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
s4_clusters <- FindClusters(object = s4_neighboors)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 128
## Number of edges: 6474
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.3126
## Number of communities: 2
## Elapsed time: 0 seconds
head(Idents(s4_clusters), 5) #cluster IDs 1st 5 cells
##  199026  301443  555700  677946 1052808 
##       1       0       1       0       1 
## Levels: 0 1
#UMAP
s4_umap <- RunUMAP(object = s4_clusters, dims = 1:20)
## 00:29:54 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:29:54 Read 128 rows and found 20 numeric columns
## 00:29:54 Using Annoy for neighbor search, n_neighbors = 30
## 00:29:54 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:29:54 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1612d3ef2b4
## 00:29:54 Searching Annoy index using 1 thread, search_k = 3000
## 00:29:54 Annoy recall = 100%
## 00:29:54 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:29:55 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:29:55 Commencing optimization for 500 epochs, with 4328 positive edges
## 00:29:55 Optimization finished
DimPlot(s4_umap, reduction = "umap")

#Doublet finder
#pK identification (no ground-truth)
sweep.res.list_s4 <- paramSweep(s4_umap, PCs = 1:20, sct = FALSE)
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_s4 <- summarizeSweep(sweep.res.list_s4, GT = FALSE)
bcmvn_s4 <- find.pK(sweep.stats_s4)

## NULL
ggplot(bcmvn_s4, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()

#0.3 max
pK <- bcmvn_s4[bcmvn_s4$BCmetric == max(bcmvn_s4$BCmetric), "pK"]
pK <- as.numeric(as.character(pK))
pK
## [1] 0.3
#Homotypic doublet proportion estimate
annotations <- s4_umap@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.05*nrow(s4_umap@meta.data)) ##in project description
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
nExp_poi.adj #expected number of doublets
## [1] 3
#DoubletFinder
s4_finalized <- doubletFinder(s4_umap,
                              PCs = 1:20,
                              pK = pK,
                              nExp = nExp_poi.adj,
                              reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 43 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(s4_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.3_3")

table(s4_finalized@meta.data$DF.classifications_0.25_0.3_3)
## 
## Doublet Singlet 
##       3     125
s4_finalized <- subset(s4_finalized, subset = DF.classifications_0.25_0.3_3 == "Singlet")
table(s4_finalized@meta.data$DF.classifications_0.25_0.3_3)
## 
## Singlet 
##     125
DimPlot(s4_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.3_3")

Sample 5 (left for later)

#Quality control
#seurat_s5[["percent.mt"]] <- PercentageFeatureSet(seurat_s5, pattern = "^mt-") 
#View(seurat_s5@meta.data)

#VlnPlot(seurat_s5, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

#FeatureScatter(seurat_s5, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
#FeatureScatter(seurat_s5, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
#FeatureScatter(seurat_s5, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 

#seurat_s5.filtered <- subset(seurat_s5, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10) #maybe lower percent.mt?

#FeatureScatter(seurat_s5.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
#FeatureScatter(seurat_s5.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
#FeatureScatter(seurat_s5.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')

#VlnPlot(seurat_s5.filtered, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

#Normalization
#s5_normalized <- NormalizeData(object = seurat_s5.filtered)

#Integration
#s5_integrated <- FindVariableFeatures(object = s5_normalized)
#top10 <- head(VariableFeatures(s5_integrated), 10)
#top10
#plot1 <- VariableFeaturePlot(s5_integrated)
#plot1
#plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
#plot2

#Scaling
#s5_scaled <- ScaleData(object = s5_integrated)

#Linear dimensionality reduction
#s5_reduced <- RunPCA(object = s5_scaled, npcs = 10) 
#VizDimLoadings(s5_reduced, dims = 1:2, reduction = "pca")
#DimPlot(s5_reduced, reduction = "pca") + NoLegend()
#DimHeatmap(s5_reduced, dims = 1:10, cells = 500, balanced = TRUE)
#ElbowPlot(s5_reduced)

#Clustering
#s5_neighboors <- FindNeighbors(object = s5_reduced, dims = 1:10)
#s5_clusters <- FindClusters(object = s5_neighboors)
#head(Idents(s5_clusters), 5) #cluster IDs 1st 5 cells

#UMAP
#s5_umap <- RunUMAP(object = s5_clusters, dims = 1:10, n.neighbors = 5)
#DimPlot(s5_umap, reduction = "umap")

#Doublet finder; UNAVAILABLE FOR THIS SAMPLE; RUN DOWNSTREAM ANALYSIS WITH AND WITHOUT S5 TO SEE THE EFFECT
#pK identification (no ground-truth)
#sweep.res.list_s5 <- paramSweep(s5_umap, PCs = 1:10, sct = FALSE) #very low cell count, gives error 
#sweep.stats_s5 <- summarizeSweep(sweep.res.list_s5, GT = FALSE)
#bcmvn_s5 <- find.pK(sweep.stats_s5)

#ggplot(bcmvn_s5, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()
#0.3 max
#pK <- bcmvn_s5[bcmvn_s4$BCmetric == max(bcmvn_s5$BCmetric), "pK"]
#pK <- as.numeric(as.character(pK))
#pK

#Homotypic doublet proportion estimate
#annotations <- s5_umap@meta.data$seurat_clusters
#homotypic.prop <- modelHomotypic(annotations)
#nExp_poi <- round(0.05*nrow(s5_umap@meta.data)) ##in project description
#nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
#nExp_poi.adj #expected number of doublets

#DoubletFinder
#s5_finalized <- doubletFinder(s5_umap,
                              #PCs = 1:20,
                              #pK = pK,
                              #nExp = nExp_poi.adj,
                              #reuse.pANN = FALSE, sct = FALSE)
#DimPlot(s5_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.3_3")
#table(s5_finalized@meta.data$DF.classifications_0.25_0.3_3)

#s5_finalized <- subset(s5_finalized, subset = DF.classifications_0.25_0.3_3 == "Singlet")
#table(s5_finalized@meta.data$DF.classifications_0.25_0.3_3)
#DimPlot(s5_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.3_3")

Sample 6

#Quality control
seurat_s6[["percent.mt"]] <- PercentageFeatureSet(seurat_s6, pattern = "^mt-") 
#View(seurat_s6@meta.data)

VlnPlot(seurat_s6, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

FeatureScatter(seurat_s6, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s6, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s6, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

seurat_s6.filtered <- subset(seurat_s6, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10)

FeatureScatter(seurat_s6.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s6.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s6.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

VlnPlot(seurat_s6.filtered, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

#Normalization
s6_normalized <- NormalizeData(object = seurat_s6.filtered)
## Normalizing layer: counts
#Integration
s6_integrated <- FindVariableFeatures(object = s6_normalized)
## Finding variable features for layer counts
top10 <- head(VariableFeatures(s6_integrated), 10)
top10
##  [1] "Klra5"   "Gm11290" "Il7r"    "Ly6c2"   "Fth1"    "Cxcl10"  "Hspa1b" 
##  [8] "Ccl9"    "Plac8"   "Trac"
plot1 <- VariableFeaturePlot(s6_integrated)
plot1
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

#Scaling
s6_scaled <- ScaleData(object = s6_integrated)
## Centering and scaling data matrix
#Linear dimensionality reduction
s6_reduced <- RunPCA(object = s6_scaled) 
## PC_ 1 
## Positive:  Emb, Il7r, Ly6e, Inpp4b, Gpr183, Cxcr6, Tmem176b, Rora, Ltb, Trgc4 
##     Itgb3, Tcf7, Tmem176a, Ly6a, Ikzf2, Itga1, Zfp36l1, Plac8, Lyst, Slco3a1 
##     Cd28, Ckb, Prnp, Xcl1, 2900026A02Rik, Cd7, Tnfsf10, Lilrb4b, Odc1, Icos 
## Negative:  Gzma, Ccl5, Gzmb, Prf1, Ccl4, Ccl3, Itgam, Lgals1, Irf8, Actb 
##     Cma1, Klrg1, S1pr5, ENSMUSG00000121359, ENSMUSG00000121360, Cdc20b, Plek, Itga2, Klra9, Ccnd2 
##     Klri2, Zeb2, Eomes, Klra4, Klra8, Rap1gap2, Serpinb9b, Klf2, H1f2, Actg1 
## PC_ 2 
## Positive:  Mpeg1, Fcgr4, Lyz2, Cd40, Csf1r, Slc15a3, Ifitm3, Slc11a1, Ly86, Wfdc17 
##     Clec7a, Ms4a8a, Bst1, Mgst1, Marcks, Il1b, Cybb, Gm21188, Tnfaip2, Ifitm2 
##     Gda, Clec4e, Il1rn, Ms4a6d, Ccr1, Cd86, Klra2, Cd74, Ifi30, Alox5ap 
## Negative:  Tcf7, Cd7, Ssh2, Emb, Ctla2a, Smad7, Slc26a11, Cblb, Dennd4a, Xcl1 
##     Fosl2, Ipcef1, P2ry10, Cd28, Zbtb20, Mideas, Lyst, Clcn3, Itgb3, Irak2 
##     Ern1, Gramd3, Tcf12, Skap1, Il18r1, Arhgap15, Sidt1, Slco3a1, Jmy, Il7r 
## PC_ 3 
## Positive:  Inpp4b, Cxcr6, Cd226, S100a6, Fgl2, Ly6c2, Actb, Hopx, Cd3g, Ly6a 
##     S100a4, Il7r, Cd3e, Trgc4, Lgals1, Gm36723, Ccnd2, Tmem176b, Itga1, Cd160 
##     Klrc1, Tmem176a, Gstp3, Cd3d, Il2ra, Trgc2, Klrg1, Jaml, Gpr183, Cx3cr1 
## Negative:  Eomes, Myb, Bcl2l11, Ccr2, Dhrs3, Tgfb1, Pmepa1, Stat3, Klra4, Cxcr4 
##     Ctla2a, Tpd52, Cux1, Nrarp, Vegfa, Plxnc1, Slc26a11, Rnf157, Sult2b1, Cemip2 
##     Ndrg1, Gstm1, ENSMUSG00000121360, Satb1, Car2, Tcf7, Ret, Klra8, Mideas, Smad7 
## PC_ 4 
## Positive:  Cd3e, Cd3d, Cd3g, Nsg2, Trac, Trgc2, Trgc1, Trgv2, S1pr1, Bcl11b 
##     Rps20, E2f1, Tubb5, Klra6, Klra7, Tcf7, Fos, Klra1, Actn1, Efcab11 
##     Smco4, Pclaf, Rgs10, Themis, Cdca2, Rps26, Atad2, Clspn, Cd8a, Slc26a11 
## Negative:  Serpinb9, Pim1, Nfkbiz, Nr4a1, Irf8, Nabp1, Vegfa, Nr4a3, Icam1, Dgat1 
##     Tmem176a, Gem, Serpinb6b, Il12rb2, Rora, Tmem176b, Ifrd1, Lilrb4b, Klrb1b, Spry2 
##     Fosl2, Kdm6b, Prf1, Nfkbid, Dennd4a, Ccl4, Plek, Gadd45b, Crem, Il2ra 
## PC_ 5 
## Positive:  Dusp2, Cd3e, Cd3d, Trgc1, Cxcr4, Smad7, S1pr1, ENSMUSG00000121069, Nsg2, Cd3g 
##     Il7r, Gramd3, Pmepa1, Trac, Trgc2, Jun, Tcf7, Ly6c2, Klf2, Bcl11b 
##     Thada, Zfp36l1, Cd69, Tnfaip8, C1qb, Ipcef1, Trgv2, Rgs1, Gpr155, Ifngas1 
## Negative:  E2f1, Mki67, Uhrf1, Pclaf, Tcf19, E2f2, Kif15, Clspn, Stmn1, Top2a 
##     Birc5, Ccna2, Ckap2, Rad51, Ncaph, Cdc6, Aspm, Cenpk, Tpx2, Rrm2 
##     Ncapg2, Ncapg, Bub1, Cdca2, Fbxo5, Bub1b, Spc24, Cenpe, Cdk1, Knl1
VizDimLoadings(s6_reduced, dims = 1:2, reduction = "pca")

DimPlot(s6_reduced, reduction = "pca") + NoLegend()

DimHeatmap(s6_reduced, dims = 1:20, cells = 500, balanced = TRUE)

ElbowPlot(s6_reduced)

#Clustering
s6_neighboors <- FindNeighbors(object = s6_reduced, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
s6_clusters <- FindClusters(object = s6_neighboors)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 926
## Number of edges: 39594
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6567
## Number of communities: 6
## Elapsed time: 0 seconds
head(Idents(s6_clusters), 5) #cluster IDs 1st 5 cells
##  37256  92053 106222 161238 184004 
##      0      0      0      4      2 
## Levels: 0 1 2 3 4 5
#UMAP
s6_umap <- RunUMAP(object = s6_clusters, dims = 1:20)
## 00:30:06 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:30:06 Read 926 rows and found 20 numeric columns
## 00:30:06 Using Annoy for neighbor search, n_neighbors = 30
## 00:30:06 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:30:06 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1615604ef4e
## 00:30:06 Searching Annoy index using 1 thread, search_k = 3000
## 00:30:06 Annoy recall = 100%
## 00:30:06 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:30:07 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:30:07 Commencing optimization for 500 epochs, with 35802 positive edges
## 00:30:08 Optimization finished
DimPlot(s6_umap, reduction = "umap")

#Doublet finder
#pK identification (no ground-truth)
sweep.res.list_s6 <- paramSweep(s6_umap, PCs = 1:20, sct = FALSE)
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_s6 <- summarizeSweep(sweep.res.list_s6, GT = FALSE)
bcmvn_s6 <- find.pK(sweep.stats_s6)

## NULL
ggplot(bcmvn_s6, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()

#0.2
pK <- bcmvn_s6[bcmvn_s6$BCmetric == max(bcmvn_s6$BCmetric), "pK"]
pK <- as.numeric(as.character(pK))
pK
## [1] 0.2
#Homotypic doublet proportion estimate
annotations <- s6_umap@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.05*nrow(s6_umap@meta.data)) ##in project description
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
nExp_poi.adj #expected number of doublets
## [1] 37
#DoubletFinder
s6_finalized <- doubletFinder(s6_umap,
                              PCs = 1:20,
                              pK = pK,
                              nExp = nExp_poi.adj,
                              reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 309 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(s6_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.2_37")

table(s6_finalized@meta.data$DF.classifications_0.25_0.2_37)
## 
## Doublet Singlet 
##      37     889
s6_finalized <- subset(s6_finalized, subset = DF.classifications_0.25_0.2_37 == "Singlet")
table(s6_finalized@meta.data$DF.classifications_0.25_0.2_37)
## 
## Singlet 
##     889
DimPlot(s6_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.2_37")

Sample 7

#Quality control
seurat_s7[["percent.mt"]] <- PercentageFeatureSet(seurat_s7, pattern = "^mt-") 
#View(seurat_s7@meta.data)

VlnPlot(seurat_s7, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

FeatureScatter(seurat_s7, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s7, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s7, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

seurat_s7.filtered <- subset(seurat_s7, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10)

FeatureScatter(seurat_s7.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s7.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s7.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

VlnPlot(seurat_s7.filtered, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

#Normalization
s7_normalized <- NormalizeData(object = seurat_s7.filtered)
## Normalizing layer: counts
#Integration
s7_integrated <- FindVariableFeatures(object = s7_normalized)
## Finding variable features for layer counts
top10 <- head(VariableFeatures(s7_integrated), 10)
top10
##  [1] "Gzma"               "Gzmb"               "Klra8"             
##  [4] "Gm11290"            "Ccl4"               "Klra4"             
##  [7] "Ccl3"               "ENSMUSG00000121360" "Ly6c2"             
## [10] "Ccl5"
plot1 <- VariableFeaturePlot(s7_integrated)
plot1

plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2

#Scaling
s7_scaled <- ScaleData(object = s7_integrated)
## Centering and scaling data matrix
#Linear dimensionality reduction
s7_reduced <- RunPCA(object = s7_scaled) 
## PC_ 1 
## Positive:  Rora, Ly6e, Ly6a, Inpp4b, Cxcr6, Il7r, Cd226, Tmem176b, Odc1, Gpr183 
##     Furin, Itgb3, Tmem176a, Emb, Hilpda, Dgat1, Lilrb4b, Gm36723, Il2ra, Trgc4 
##     ENSMUSG00000121239, AA467197, Hif1a, S100a4, Rbpj, Ckb, Camk4, Icos, AU020206, Scn1b 
## Negative:  Gzma, Ccl5, Eomes, Sell, Klra8, ENSMUSG00000121359, Itgam, Prf1, Klra4, Ccl3 
##     ENSMUSG00000121360, Klra9, Gzmb, Ccl4, Itga2, Klra7, Klf2, Tyrobp, Lgals1, Serpinb9b 
##     Irf8, Gm4956, Gstm1, Zeb2, Mcam, Itga4, Dusp2, Klra3, Actb, Cdc20b 
## PC_ 2 
## Positive:  Klrb1b, Car2, Ncr1, Tyrobp, Irf8, AW112010, Klrb1f, Serpinb9, Vegfa, Bhlhe40 
##     Plek, Itgax, Sema4c, Lilrb4b, Prf1, Nabp1, Spry2, Tmem176b, Ccr2, Gm36723 
##     Serpina3g, Klri2, Dgat1, Scn1b, Arrdc4, Tmem176a, Nudt4, Gzma, Klrb1a, Maff 
## Negative:  Cd3e, Cd3d, Trac, Cd3g, Trbc2, Cd5, Bcl11b, Trgc1, Trgc2, Ly6c2 
##     Actn1, Dpp4, Slfn1, Gramd3, Dtx4, Ifi27l2a, Ass1, Il21r, Cd6, Peli1 
##     Pitpnm2, Gm8369, Lat, Cd40lg, Thada, Trgv2, Klra1, Cd8a, Klra6, Itk 
## PC_ 3 
## Positive:  Bach2, Gm2682, Foxp1, Atp8b4, Celf2, Dennd4a, Utrn, Aff3, Mgat5, Kdm2b 
##     Gm38190, Vps37b, Btbd11, Spry2, Ern1, Angpt1, Pde1c, Arhgap15, Ulk2, Tcf12 
##     Fryl, Zeb1, Trps1, Sell, Satb1, Stag1, Arhgap26, Arhgap10, Dym, Hook2 
## Negative:  Tmsb4x, Cxcr6, Actg1, Ly6a, Cd8b1, Actb, Themis, S100a6, AA467197, Id2 
##     Cd8a, Lcp1, Gzmk, Lgals1, Gm8369, AW112010, Fam169b, St8sia1, Capg, Fgl2 
##     Furin, Crip1, Adam8, Nol6, Eif5a, Pdcd1, S100a4, Adam19, Gapdh, Ifitm1 
## PC_ 4 
## Positive:  Gzmb, Tnfrsf9, Tnfsf11, Tgfb1, Ybx3, St8sia1, Rbpj, Themis, Egr2, Mif 
##     Chst2, Irf4, Nhlrc2, Art2b, Snhg4, Cd8b1, Zeb2, Hk2, Pdcd1, Cd8a 
##     Nefh, Atp13a3, Sit1, Entpd1, Lgals1, Trerf1, Zfp61, Rnf157, Ralgps2, Gm2449 
## Negative:  Il7r, Cd7, Dusp1, Jun, Trgc4, Ly6e, Fos, S1pr1, Lyst, Zfp36 
##     Gpr183, Fosb, Zfp36l1, Lmo4, Zfp36l2, Klrc1, Sidt1, Tmem176b, Slfn5, Trgc1 
##     Prnp, Dmrta1, Tmem176a, Rflnb, Txnip, Klf3, Senp7, Trgv1, Cxcr4, Ifi213 
## PC_ 5 
## Positive:  Gm12185, Actb, Txnip, Vav3, 9930111J21Rik2, Id2, Epsti1, Styk1, Coro2a, Rab40c 
##     Rgs12, S100a4, Gm36723, Gm36445, Rab2b, Tmsb4x, Gpr34, Cd226, Mfsd8, Gab3 
##     Slc38a9, Irgm2, Pex2, Akap17b, Aak1, AW112010, Ly6e, Luc7l3, Xrcc5, S100a6 
## Negative:  Kdm6b, Nr4a1, Pim1, Nr4a3, Tnfaip3, Nfkbia, Nfkbid, Csrnp1, Fosl2, Dusp5 
##     Vps37b, Nfkbiz, Maff, Bhlhe40, Vegfa, Crem, Rel, Ifrd1, Gadd45b, Zc3h12a 
##     Dusp10, Nup98, Gpr132, Odc1, Cdkn1a, Nr4a2, Sik1, Gm12536, Mxd1, 1700008J07Rik
VizDimLoadings(s7_reduced, dims = 1:2, reduction = "pca")

DimPlot(s7_reduced, reduction = "pca") + NoLegend()

DimHeatmap(s7_reduced, dims = 1:20, cells = 500, balanced = TRUE)
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.

ElbowPlot(s7_reduced)

#Clustering
s7_neighboors <- FindNeighbors(object = s7_reduced, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
s7_clusters <- FindClusters(object = s7_neighboors)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 268
## Number of edges: 11874
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.5993
## Number of communities: 3
## Elapsed time: 0 seconds
head(Idents(s7_clusters), 5) #cluster IDs 1st 5 cells
##  407927  519089  829786 1238414 1285777 
##       1       0       2       0       2 
## Levels: 0 1 2
#UMAP
s7_umap <- RunUMAP(object = s7_clusters, dims = 1:20)
## 00:30:24 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:30:24 Read 268 rows and found 20 numeric columns
## 00:30:24 Using Annoy for neighbor search, n_neighbors = 30
## 00:30:24 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:30:24 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1614cb863cf
## 00:30:24 Searching Annoy index using 1 thread, search_k = 3000
## 00:30:24 Annoy recall = 100%
## 00:30:24 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:30:25 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:30:25 Commencing optimization for 500 epochs, with 9456 positive edges
## 00:30:25 Optimization finished
DimPlot(s7_umap, reduction = "umap")

#Doublet finder
#pK identification (no ground-truth)
sweep.res.list_s7 <- paramSweep(s7_umap, PCs = 1:20, sct = FALSE)
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_s7 <- summarizeSweep(sweep.res.list_s7, GT = FALSE)
bcmvn_s7 <- find.pK(sweep.stats_s7)

## NULL
ggplot(bcmvn_s7, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()

#0.16
pK <- bcmvn_s7[bcmvn_s7$BCmetric == max(bcmvn_s7$BCmetric), "pK"]
pK <- as.numeric(as.character(pK))
pK
## [1] 0.16
#Homotypic doublet proportion estimate
annotations <- s7_umap@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.05*nrow(s7_umap@meta.data)) ##in project description
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
nExp_poi.adj #expected number of doublets
## [1] 9
#DoubletFinder
s7_finalized <- doubletFinder(s7_umap,
                              PCs = 1:20,
                              pK = pK,
                              nExp = nExp_poi.adj,
                              reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 89 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(s7_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.16_9")

table(s7_finalized@meta.data$DF.classifications_0.25_0.16_9)
## 
## Doublet Singlet 
##       9     259
s7_finalized <- subset(s7_finalized, subset = DF.classifications_0.25_0.16_9 == "Singlet")
table(s7_finalized@meta.data$DF.classifications_0.25_0.16_9)
## 
## Singlet 
##     259
DimPlot(s7_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.16_9")

Sample 8

#Quality control
seurat_s8[["percent.mt"]] <- PercentageFeatureSet(seurat_s8, pattern = "^mt-") 
#View(seurat_s8@meta.data)

VlnPlot(seurat_s8, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

FeatureScatter(seurat_s8, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s8, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s8, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

seurat_s8.filtered <- subset(seurat_s8, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10)

FeatureScatter(seurat_s8.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s8.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm') 
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s8.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

VlnPlot(seurat_s8.filtered, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

#Normalization
s8_normalized <- NormalizeData(object = seurat_s8.filtered)
## Normalizing layer: counts
#VFs
s8_integrated <- FindVariableFeatures(object = s8_normalized)
## Finding variable features for layer counts
top10 <- head(VariableFeatures(s8_integrated), 10)
top10
##  [1] "Gm11290" "Cxcl10"  "Ifitm1"  "Ccl3"    "Ly6c2"   "Trac"    "Ccl4"   
##  [8] "Gzmc"    "Plac8"   "Gzmb"
plot1 <- VariableFeaturePlot(s8_integrated)
plot1

plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2

#Scaling
s8_scaled <- ScaleData(object = s8_integrated)
## Centering and scaling data matrix
#Linear dimensionality reduction
s8_reduced <- RunPCA(object = s8_scaled) 
## PC_ 1 
## Positive:  Ly6e, Il7r, Ly6a, Emb, Cxcr6, Rora, Ifitm2, Gpr183, Lilrb4b, Slfn1 
##     Plac8, Inpp4b, Ltb, Tmem176b, Jaml, Itgb3, Camk4, Cd226, Trgc4, Rbpj 
##     Ikzf2, Ifitm3, Csf3r, Mpeg1, Ptafr, Cd3e, Gm21188, Ifi27l2a, Lilrb4a, Ctss 
## Negative:  Gzma, Prf1, Ccl5, Gzmb, Irf8, Itgam, Cma1, Eomes, Ccl4, Ccl3 
##     ENSMUSG00000121359, Lgals1, Klri2, Klra8, Klf2, ENSMUSG00000121360, Sell, Klra9, Klrg1, Serpinb9b 
##     Cdc20b, Klra4, S1pr5, Plek, Actb, Flna, Serpinb9, Dusp2, Arsb, Klra3 
## PC_ 2 
## Positive:  Il1b, Pla2g7, Csf3r, Mpeg1, Ptafr, F10, Gm21188, Ly6i, Cybb, Clec4a1 
##     Pld4, Ube2l6, Il6ra, Tlr7, Clec4a3, Tbc1d9, Fcgr4, Spi1, Lyz2, Sirpa 
##     Gk, Csf2rb, Marcks, Pirb, Lgmn, ENSMUSG00000120878, Apoe, Ly86, Cd300c2, Cd68 
## Negative:  Il7r, Emb, Tcf7, Inpp4b, Cd7, Gpr183, Cd226, Itga1, Ltb, Rora 
##     Trgc4, Thy1, Lyst, Ikzf2, Cxcr6, Camk4, Trbc2, Cd3e, Xcl1, Cd3d 
##     Tmem176a, Ly6e, Tnfsf10, Gramd3, Cd3g, Icos, Itgb3, Cd160, Tmem176b, Zfp36l1 
## PC_ 3 
## Positive:  Vegfa, Bcl2l11, Dhrs3, Fosl2, Sft2d2, Eomes, Myb, Ccr2, Stat3, Satb1 
##     Car2, Gpr132, Rnf157, Irak2, Tpd52, Cxcr4, Irf8, Tgfb1, Ctla2a, Skil 
##     Dennd4a, Emb, Slc26a11, Sell, Irs2, Bach2, Malt1, Dgat1, Cemip2, Ern1 
## Negative:  Cd3e, Ly6c2, Cd3g, Cd3d, Actb, S100a6, Cd8b1, Cd8a, Trac, Adam19 
##     Trbc2, Pdcd1, Cx3cr1, Trgc1, Jaml, S100a4, Trgc2, Cxcr6, Cd226, Ccnd2 
##     Cd5, Slfn1, Fgl2, Ifit1bl1, Trgv2, Bcl11b, Cd160, Dpp4, Trat1, Vim 
## PC_ 4 
## Positive:  Tmem176b, Tmem176a, Ckb, Adgrg3, Klrb1b, Ccdc184, Asb2, Gm36723, Dscam, Trgc4 
##     Scn1b, Sema6d, Fgl2, Krt83, Adam8, Psd2, 2900026A02Rik, Arrdc4, Lilrb4a, Serpinb1a 
##     Furin, H1f0, Gzmc, Prnp, Klrc1, Cd226, Adgrg5, Cd160, Inpp4b, Cep290 
## Negative:  Cd3e, Oas3, Ifit1bl1, Ifit3, Cd3d, Gm8369, Ifit1, Slfn1, Trac, Dpp4 
##     Cmpk2, Cd8a, Mx1, Ifit3b, Ifi27l2a, Rtp4, Cd8b1, A330040F15Rik, Cd6, Cd3g 
##     Mx2, Slfn5, Phf11c, Ifi208, Ifi213, Ifi206, Ifit2, S1pr1, Ifi209, Cxcl10 
## PC_ 5 
## Positive:  Tcf7, Ctla2a, Osgin1, Pmepa1, Jun, Slc26a11, Fos, Fth1, Cd7, Psap 
##     Tspan13, Cited4, Gpr155, Isg20, Car2, Xcl1, Ftl1, Filip1l, Klra4, Mfsd6 
##     1700017B05Rik, Rgs10, Camk2n1, Spsb1, Ccr2, Eya2, Trdc, Ly6e, Ighm, Rgs16 
## Negative:  Serpinb9, Nr4a1, Mki67, Lgals1, Gzmb, Il12rb2, Sgk1, Kif11, Pclaf, Flna 
##     Birc5, Asf1b, Pola1, Plek, Nfkbia, Cdca3, Top2a, Ckap2l, Actb, Nfkbiz 
##     Prf1, Bard1, Ccl3, Stmn1, Anln, Brip1, Cx3cr1, Spc24, Magt1, Ccna2
VizDimLoadings(s8_reduced, dims = 1:2, reduction = "pca")

DimPlot(s8_reduced, reduction = "pca") + NoLegend()

DimHeatmap(s8_reduced, dims = 1:20, cells = 500, balanced = TRUE)

ElbowPlot(s8_reduced)

#Clustering
s8_neighboors <- FindNeighbors(object = s8_reduced, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
s8_clusters <- FindClusters(object = s8_neighboors)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 640
## Number of edges: 26292
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6579
## Number of communities: 5
## Elapsed time: 0 seconds
head(Idents(s8_clusters), 5) #cluster IDs 1st 5 cells
##  53570 215374 353164 525991 531003 
##      3      0      1      2      3 
## Levels: 0 1 2 3 4
#UMAP
s8_umap <- RunUMAP(object = s8_clusters, dims = 1:20)
## 00:30:37 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:30:37 Read 640 rows and found 20 numeric columns
## 00:30:37 Using Annoy for neighbor search, n_neighbors = 30
## 00:30:37 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:30:37 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1611e4d108b
## 00:30:37 Searching Annoy index using 1 thread, search_k = 3000
## 00:30:37 Annoy recall = 100%
## 00:30:37 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:30:38 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:30:38 Commencing optimization for 500 epochs, with 24348 positive edges
## 00:30:38 Optimization finished
DimPlot(s8_umap, reduction = "umap")

#Doublet finder
#pK identification (no ground-truth)
sweep.res.list_s8 <- paramSweep(s8_umap, PCs = 1:20, sct = FALSE)
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_s8 <- summarizeSweep(sweep.res.list_s8, GT = FALSE)
bcmvn_s8 <- find.pK(sweep.stats_s8)

## NULL
ggplot(bcmvn_s8, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()

#0.15
pK <- bcmvn_s8[bcmvn_s8$BCmetric == max(bcmvn_s8$BCmetric), "pK"]
pK <- as.numeric(as.character(pK))
pK
## [1] 0.15
#Homotypic doublet proportion estimate
annotations <- s8_umap@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.05*nrow(s8_umap@meta.data)) ##in project description
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
nExp_poi.adj #expected number of doublets
## [1] 24
#DoubletFinder
s8_finalized <- doubletFinder(s8_umap,
                              PCs = 1:20,
                              pK = pK,
                              nExp = nExp_poi.adj,
                              reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 213 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(s8_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.15_24")

table(s8_finalized@meta.data$DF.classifications_0.25_0.15_24)
## 
## Doublet Singlet 
##      24     616
s8_finalized <- subset(s8_finalized, subset = DF.classifications_0.25_0.15_24 == "Singlet")
table(s8_finalized@meta.data$DF.classifications_0.25_0.15_24)
## 
## Singlet 
##     616
DimPlot(s8_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.15_24")

Next, I merge the samples into a single object for easier downstream analysis (skipping sample 5 for now).

samples <- list(s2_finalized, s3_finalized, s4_finalized, s6_finalized, s7_finalized, s8_finalized) #skipping s5
merged_object <- merge(s1_finalized, y = samples, add.cell.ids = c("s1", "s2", "s3", "s4", "s6", "s7", "s8"))
#merged_object[["RNA"]] <- split(merged_object[["RNA"]], f = merged_object$orig.ident) #not necessary to include scale.data since integrating downstream it's recalculated

Proceeding with the steps in the Seurat documentation on integration.

merged_object <- NormalizeData(merged_object)
## Normalizing layer: counts.Sample_1
## Normalizing layer: counts.Sample_2
## Normalizing layer: counts.Sample_3
## Normalizing layer: counts.Sample_4
## Normalizing layer: counts.Sample_6
## Normalizing layer: counts.Sample_7
## Normalizing layer: counts.Sample_8
merged_object <- FindVariableFeatures(merged_object)
## Finding variable features for layer counts.Sample_1
## Finding variable features for layer counts.Sample_2
## Finding variable features for layer counts.Sample_3
## Finding variable features for layer counts.Sample_4
## Finding variable features for layer counts.Sample_6
## Finding variable features for layer counts.Sample_7
## Finding variable features for layer counts.Sample_8
merged_object <- ScaleData(merged_object)
## Centering and scaling data matrix
merged_object <- RunPCA(merged_object)
## PC_ 1 
## Positive:  Emb, Il7r, Ly6e, Gpr183, Tcf7, Inpp4b, Cxcr6, Rora, Trgc4, Ltb 
##     Ikzf2, Itgb3, Camk4, Cd3e, Cd28, Lyst, Gramd3, Ly6a, Thy1, Cd3g 
##     Itga1, Tmem176b, Zfp36l1, Cd7, Gpr132, Cd3d, Plac8, Lilrb4b, Tmem176a, Cxcr3 
## Negative:  Gzma, Prf1, Gzmb, Ccl5, Ccl3, Ccl4, Irf8, Itgam, Cma1, Lgals1 
##     Actb, Plek, Itgb2, Klrg1, S1pr5, ENSMUSG00000121359, Emp3, Klra9, Flna, Cdc20b 
##     ENSMUSG00000121360, Zeb2, Ncr1, Itga2, Sgk1, Eomes, Serpinb9b, Klra8, Ccnd2, Klri2 
## PC_ 2 
## Positive:  S100a6, Actb, Ly6c2, Cd52, Cd226, Cxcr6, S100a4, Fgl2, Ly6a, Itgb2 
##     mt-Nd4, Inpp4b, Hopx, Trgc4, Il7r, Ncr1, Cd3e, Cd3g, Ccnd2, Cd160 
##     Tmem176b, Lgals1, Cd3d, Tmem176a, Gm36723, Vim, Slamf7, Jaml, Adam8, Klrc1 
## Negative:  Eomes, Dhrs3, Myb, Bcl2l11, Pmepa1, Stat3, Tgfb1, Cemip2, Ccr2, Ctla2a 
##     Klra4, Slc26a11, Plxnc1, Vegfa, Tspan13, Rnf157, Sft2d2, Car2, Cxcr4, Satb1 
##     Klra8, Tpd52, ENSMUSG00000121360, Skil, Slc3a2, Gstm1, Sell, Smad7, Sult2b1, Bach2 
## PC_ 3 
## Positive:  Serpinb9, Bhlhe40, Vegfa, Nr4a1, Pim1, Irf8, Nfkbiz, Nfkbia, Vps37b, Dusp5 
##     Nr4a3, Nabp1, Ifrd1, Kdm6b, Fosl2, Dgat1, Spry2, Serpinb6b, Icam1, Nfkbid 
##     Klrb1b, Tmem176b, Tmem176a, Mxd1, Lilrb4b, Birc3, Tnfrsf9, Plek, Crem, Gem 
## Negative:  Cd3e, Cd3g, Cd3d, Trgc2, Trac, Trgc1, Trgv2, Fos, Dpp4, S1pr1 
##     Klra6, Slfn1, Gm8369, Actn1, Klf2, Cd8a, Bcl11b, Cd5, Jun, Plcg1 
##     Cd6, Klra1, Trbc2, Ifi208, Hdgfl3, Rgs10, Peli1, Themis, Tcf7, Pmepa1 
## PC_ 4 
## Positive:  Fth1, Pmepa1, Vegfa, AA467197, Plac8, Tspan13, Aldoa, Osgin1, Dhrs3, Cdkn1a 
##     Dgat1, Rbpj, Scn1b, Prdx6, 1700017B05Rik, Spsb1, Odc1, Rgs16, Adam8, Nrarp 
##     Ly6a, Ctla2a, Cish, Tgfb1, Prnp, Itgb3, Actg1, Lgals3, Cemip2, Smox 
## Negative:  Gm2682, Ripor2, Add3, Fos, Arhgap15, Dock10, Pik3r1, Peak1, Rap1gap2, Prkcq 
##     Elmo1, Pitpnc1, Utrn, Dock11, Plek, Itga4, Fosb, Tnik, Atp8b4, Arsb 
##     Ncr1, Scml4, Ccnd3, Dennd4a, Itga2, Atp2b1, Cblb, Epsti1, Klf6, Gm26740 
## PC_ 5 
## Positive:  Cd3e, Trac, Cd3d, Cd3g, Kdm6b, Nr4a3, Nr4a1, Gramd3, Pim1, Fosl2 
##     Vps37b, Nr4a2, Nfkbia, Trgc2, Nfkbid, Ifrd1, Nfkbiz, Cd5, Crem, Dpp4 
##     Trbc2, Dusp5, Trgc1, Trgv2, Slfn1, Cd6, Actn1, Gna13, Icam1, Ccl5 
## Negative:  Tmem176b, Tmem176a, Klrb1b, Ckb, Gm36723, Cited4, Jun, Scn1b, Dscam, Klrb1f 
##     Acpp, Asb2, Adgrg3, Id2, Ncr1, Car2, Gpr34, Prnp, Arrdc4, Sema6d 
##     ENSMUSG00000121239, Gas7, 2900026A02Rik, Fos, 9930111J21Rik2, Vegfc, AU020206, Itgax, Adam8, Krt83
merged_object <- FindNeighbors(merged_object, dims = 1:20, reduction = "pca")
## Computing nearest neighbor graph
## Computing SNN
merged_object <- FindClusters(merged_object, resolution = 2, cluster.name = "unintegrated_clusters")
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5183
## Number of edges: 176050
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6386
## Number of communities: 18
## Elapsed time: 0 seconds
merged_object <- RunUMAP(merged_object, dims = 1:20, reduction = "pca", reduction.name = "umap.unintegrated")
## 00:31:04 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:31:04 Read 5183 rows and found 20 numeric columns
## 00:31:04 Using Annoy for neighbor search, n_neighbors = 30
## 00:31:04 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:31:04 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec16120b57951
## 00:31:04 Searching Annoy index using 1 thread, search_k = 3000
## 00:31:05 Annoy recall = 100%
## 00:31:06 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:31:06 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:31:06 Commencing optimization for 500 epochs, with 212392 positive edges
## 00:31:09 Optimization finished
DimPlot(merged_object, reduction ="umap.unintegrated", group.by = c("orig.ident", "seurat_clusters"))

#Integration
merged_object <- IntegrateLayers(object = merged_object, method = CCAIntegration, orig.reduction = "pca", new.reduction = "integrated.cca",
                                      verbose = FALSE)

merged_object <- JoinLayers(merged_object)
head(merged_object)
##           orig.ident nCount_RNA nFeature_RNA percent.mt RNA_snn_res.0.8
## s1_14310    Sample_1       5267         2239   3.284602               1
## s1_38613    Sample_1       7072         2749   2.969457               0
## s1_44195    Sample_1       4280         1954   2.663551               0
## s1_93453    Sample_1       4943         1877   4.733967               3
## s1_157761   Sample_1       4008         1729   6.087824               0
## s1_214830   Sample_1       7224         2644   4.332780               3
## s1_230825   Sample_1       5366         2217   4.714871               3
## s1_276993   Sample_1       5506         2442   2.760625               5
## s1_300567   Sample_1       3749         1862   3.974393               2
## s1_376984   Sample_1       5816         2263   5.140990               0
##           seurat_clusters pANN_0.25_0.29_67 DF.classifications_0.25_0.29_67
## s1_14310                0         0.2087227                         Singlet
## s1_38613               11         0.3255452                         Singlet
## s1_44195                1         0.2040498                         Singlet
## s1_93453                8         0.1869159                         Singlet
## s1_157761               1         0.2056075                         Singlet
## s1_214830               9         0.1666667                         Singlet
## s1_230825               9         0.2196262                         Singlet
## s1_276993               6         0.3146417                         Singlet
## s1_300567               3         0.1121495                         Singlet
## s1_376984               1         0.2274143                         Singlet
##           pANN_0.25_0.11_34 DF.classifications_0.25_0.11_34 pANN_0.25_0.25_31
## s1_14310                 NA                            <NA>                NA
## s1_38613                 NA                            <NA>                NA
## s1_44195                 NA                            <NA>                NA
## s1_93453                 NA                            <NA>                NA
## s1_157761                NA                            <NA>                NA
## s1_214830                NA                            <NA>                NA
## s1_230825                NA                            <NA>                NA
## s1_276993                NA                            <NA>                NA
## s1_300567                NA                            <NA>                NA
## s1_376984                NA                            <NA>                NA
##           DF.classifications_0.25_0.25_31 pANN_0.25_0.3_3
## s1_14310                             <NA>              NA
## s1_38613                             <NA>              NA
## s1_44195                             <NA>              NA
## s1_93453                             <NA>              NA
## s1_157761                            <NA>              NA
## s1_214830                            <NA>              NA
## s1_230825                            <NA>              NA
## s1_276993                            <NA>              NA
## s1_300567                            <NA>              NA
## s1_376984                            <NA>              NA
##           DF.classifications_0.25_0.3_3 pANN_0.25_0.2_37
## s1_14310                           <NA>               NA
## s1_38613                           <NA>               NA
## s1_44195                           <NA>               NA
## s1_93453                           <NA>               NA
## s1_157761                          <NA>               NA
## s1_214830                          <NA>               NA
## s1_230825                          <NA>               NA
## s1_276993                          <NA>               NA
## s1_300567                          <NA>               NA
## s1_376984                          <NA>               NA
##           DF.classifications_0.25_0.2_37 pANN_0.25_0.16_9
## s1_14310                            <NA>               NA
## s1_38613                            <NA>               NA
## s1_44195                            <NA>               NA
## s1_93453                            <NA>               NA
## s1_157761                           <NA>               NA
## s1_214830                           <NA>               NA
## s1_230825                           <NA>               NA
## s1_276993                           <NA>               NA
## s1_300567                           <NA>               NA
## s1_376984                           <NA>               NA
##           DF.classifications_0.25_0.16_9 pANN_0.25_0.15_24
## s1_14310                            <NA>                NA
## s1_38613                            <NA>                NA
## s1_44195                            <NA>                NA
## s1_93453                            <NA>                NA
## s1_157761                           <NA>                NA
## s1_214830                           <NA>                NA
## s1_230825                           <NA>                NA
## s1_276993                           <NA>                NA
## s1_300567                           <NA>                NA
## s1_376984                           <NA>                NA
##           DF.classifications_0.25_0.15_24 unintegrated_clusters
## s1_14310                             <NA>                     0
## s1_38613                             <NA>                    11
## s1_44195                             <NA>                     1
## s1_93453                             <NA>                     8
## s1_157761                            <NA>                     1
## s1_214830                            <NA>                     9
## s1_230825                            <NA>                     9
## s1_276993                            <NA>                     6
## s1_300567                            <NA>                     3
## s1_376984                            <NA>                     1
tail(merged_object)
##             orig.ident nCount_RNA nFeature_RNA percent.mt RNA_snn_res.0.8
## s8_55985941   Sample_8       3697         1681   7.817149               0
## s8_56094760   Sample_8       4949         2041   4.485755               3
## s8_56193215   Sample_8       2666         1376   1.312828               4
## s8_56197150   Sample_8       2780         1503   4.820144               3
## s8_56216117   Sample_8       4579         2018   3.734440               3
## s8_56294783   Sample_8       4866         2143   5.219893               0
## s8_56317073   Sample_8       2623         1308   5.871140               0
## s8_56326298   Sample_8       3291         1551   5.408690               1
## s8_56483446   Sample_8       3930         1913   4.249364               0
## s8_56571947   Sample_8       3873         1821   4.441002               0
##             seurat_clusters pANN_0.25_0.29_67 DF.classifications_0.25_0.29_67
## s8_55985941               3                NA                            <NA>
## s8_56094760               6                NA                            <NA>
## s8_56193215               5                NA                            <NA>
## s8_56197150               6                NA                            <NA>
## s8_56216117               6                NA                            <NA>
## s8_56294783               2                NA                            <NA>
## s8_56317073               2                NA                            <NA>
## s8_56326298               4                NA                            <NA>
## s8_56483446               3                NA                            <NA>
## s8_56571947               3                NA                            <NA>
##             pANN_0.25_0.11_34 DF.classifications_0.25_0.11_34 pANN_0.25_0.25_31
## s8_55985941                NA                            <NA>                NA
## s8_56094760                NA                            <NA>                NA
## s8_56193215                NA                            <NA>                NA
## s8_56197150                NA                            <NA>                NA
## s8_56216117                NA                            <NA>                NA
## s8_56294783                NA                            <NA>                NA
## s8_56317073                NA                            <NA>                NA
## s8_56326298                NA                            <NA>                NA
## s8_56483446                NA                            <NA>                NA
## s8_56571947                NA                            <NA>                NA
##             DF.classifications_0.25_0.25_31 pANN_0.25_0.3_3
## s8_55985941                            <NA>              NA
## s8_56094760                            <NA>              NA
## s8_56193215                            <NA>              NA
## s8_56197150                            <NA>              NA
## s8_56216117                            <NA>              NA
## s8_56294783                            <NA>              NA
## s8_56317073                            <NA>              NA
## s8_56326298                            <NA>              NA
## s8_56483446                            <NA>              NA
## s8_56571947                            <NA>              NA
##             DF.classifications_0.25_0.3_3 pANN_0.25_0.2_37
## s8_55985941                          <NA>               NA
## s8_56094760                          <NA>               NA
## s8_56193215                          <NA>               NA
## s8_56197150                          <NA>               NA
## s8_56216117                          <NA>               NA
## s8_56294783                          <NA>               NA
## s8_56317073                          <NA>               NA
## s8_56326298                          <NA>               NA
## s8_56483446                          <NA>               NA
## s8_56571947                          <NA>               NA
##             DF.classifications_0.25_0.2_37 pANN_0.25_0.16_9
## s8_55985941                           <NA>               NA
## s8_56094760                           <NA>               NA
## s8_56193215                           <NA>               NA
## s8_56197150                           <NA>               NA
## s8_56216117                           <NA>               NA
## s8_56294783                           <NA>               NA
## s8_56317073                           <NA>               NA
## s8_56326298                           <NA>               NA
## s8_56483446                           <NA>               NA
## s8_56571947                           <NA>               NA
##             DF.classifications_0.25_0.16_9 pANN_0.25_0.15_24
## s8_55985941                           <NA>         0.0468750
## s8_56094760                           <NA>         0.1796875
## s8_56193215                           <NA>         0.1328125
## s8_56197150                           <NA>         0.1640625
## s8_56216117                           <NA>         0.2265625
## s8_56294783                           <NA>         0.0937500
## s8_56317073                           <NA>         0.0468750
## s8_56326298                           <NA>         0.0468750
## s8_56483446                           <NA>         0.1406250
## s8_56571947                           <NA>         0.0468750
##             DF.classifications_0.25_0.15_24 unintegrated_clusters
## s8_55985941                         Singlet                     3
## s8_56094760                         Singlet                     6
## s8_56193215                         Singlet                     5
## s8_56197150                         Singlet                     6
## s8_56216117                         Singlet                     6
## s8_56294783                         Singlet                     2
## s8_56317073                         Singlet                     2
## s8_56326298                         Singlet                     4
## s8_56483446                         Singlet                     3
## s8_56571947                         Singlet                     3
merged_object <- FindNeighbors(merged_object, reduction = "integrated.cca", dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
merged_object <- FindClusters(merged_object, resolution = 0.5, cluster.name = "integrated_clusters") #keep lower; increase based on cond
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5183
## Number of edges: 222642
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8181
## Number of communities: 10
## Elapsed time: 0 seconds
merged_object <- RunUMAP(merged_object, dims = 1:20, reduction = "integrated.cca")
## 00:31:56 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:31:56 Read 5183 rows and found 20 numeric columns
## 00:31:56 Using Annoy for neighbor search, n_neighbors = 30
## 00:31:56 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:31:56 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1616a2fc59d
## 00:31:56 Searching Annoy index using 1 thread, search_k = 3000
## 00:31:57 Annoy recall = 100%
## 00:31:57 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:31:58 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:31:58 Commencing optimization for 500 epochs, with 218726 positive edges
## 00:32:01 Optimization finished
DimPlot(merged_object, reduction = "umap", group.by = c("orig.ident", "seurat_clusters")) #should be c("orig.ident","seurat_annotations")

DimPlot(merged_object, reduction = "umap", split.by = "orig.ident")

This is a plot regarding the percentage of samples in each cluster.

#plot as percent rather than just number 
cell_counts <- table(merged_object$orig.ident, merged_object$integrated_clusters)
cell_perc <- prop.table(cell_counts, margin = 1)*100
percentages_df <- melt(cell_perc, varnames = c("Sample", "Cluster"), value.name = "Percentage")

ggplot(percentages_df, aes(x = as.factor(Cluster), y = Percentage, fill = Sample)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  labs(title = "Percentage of cells per sample in each cluster",
       x = "Cluster",
       y = "Percentage of Cells") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Finding all markers

#Finding ALL MARKERS for all clusters
all_markers <- FindAllMarkers(merged_object, only.pos = T)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
top_markers <- all_markers %>%
  group_by(cluster) %>%
  slice_max(order_by = avg_log2FC, n = 20)
print(top_markers, n = 200)
## # A tibble: 200 × 7
## # Groups:   cluster [10]
##         p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene              
##         <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>             
##   1 1.77e- 35       4.61 0.076 0.012 3.13e- 31 0       Ccl9              
##   2 1.93e- 16       4.01 0.025 0.002 3.42e- 12 0       Cdc42ep1          
##   3 4.50e- 83       3.98 0.138 0.012 7.97e- 79 0       Gpx8              
##   4 4.19e- 10       3.52 0.019 0.003 7.43e-  6 0       ENSMUSG00000121073
##   5 7.75e-126       3.23 0.272 0.045 1.37e-121 0       Cx3cr1            
##   6 3.96e- 10       3.11 0.02  0.003 7.01e-  6 0       Slc2a10           
##   7 4.77e- 22       2.85 0.042 0.005 8.45e- 18 0       Rhpn1             
##   8 6.53e- 24       2.74 0.052 0.008 1.16e- 19 0       Adrb1             
##   9 1.28e-  7       2.69 0.017 0.003 2.27e-  3 0       Cerox1            
##  10 2.20e- 12       2.67 0.026 0.004 3.90e-  8 0       Tubb2b            
##  11 3.31e-152       2.67 0.389 0.087 5.87e-148 0       Cdc20b            
##  12 4.37e- 18       2.65 0.045 0.009 7.73e- 14 0       Cym               
##  13 2.95e-  6       2.56 0.011 0.002 5.22e-  2 0       Dagla             
##  14 9.47e- 22       2.53 0.053 0.01  1.68e- 17 0       Ptgs1             
##  15 0               2.52 0.975 0.588 0         0       Gzmb              
##  16 7.92e-  6       2.48 0.012 0.002 1.40e-  1 0       Tnfsfm13          
##  17 3.48e-240       2.42 0.598 0.154 6.16e-236 0       Cma1              
##  18 2.93e-178       2.39 0.509 0.145 5.20e-174 0       S1pr5             
##  19 6.60e- 20       2.36 0.061 0.015 1.17e- 15 0       Top1mt            
##  20 1.66e-203       2.36 0.558 0.156 2.94e-199 0       Klrg1             
##  21 8.99e-  8       3.96 0.011 0.001 1.59e-  3 1       Ccn5              
##  22 3.19e-  5       2.46 0.011 0.002 5.65e-  1 1       4921504E06Rik     
##  23 9.96e-  5       2.25 0.011 0.002 1   e+  0 1       Gm45495           
##  24 1.27e-  3       2.13 0.01  0.003 1   e+  0 1       Cfap141           
##  25 6.39e-  4       2.13 0.01  0.003 1   e+  0 1       Noxo1             
##  26 1.43e-  3       1.96 0.012 0.004 1   e+  0 1       Gm2582            
##  27 1.57e-  9       1.77 0.045 0.016 2.78e-  5 1       Kcnc1             
##  28 3.44e-  4       1.71 0.011 0.003 1   e+  0 1       Ptprq             
##  29 5.53e-  3       1.67 0.011 0.004 1   e+  0 1       ENSMUSG00000120350
##  30 3.83e-  9       1.67 0.038 0.013 6.77e-  5 1       Clip3             
##  31 1.03e-  7       1.66 0.035 0.013 1.82e-  3 1       Hemgn             
##  32 5.25e-  4       1.64 0.014 0.005 1   e+  0 1       Rassf4            
##  33 5.94e-  3       1.63 0.012 0.005 1   e+  0 1       A930001C03Rik     
##  34 7.86e-  3       1.60 0.011 0.004 1   e+  0 1       4930549G23Rik     
##  35 4.80e-  3       1.56 0.014 0.006 1   e+  0 1       Gm31616           
##  36 4.39e- 12       1.55 0.061 0.022 7.77e-  8 1       Fndc4             
##  37 5.17e-  3       1.53 0.017 0.008 1   e+  0 1       Gm15787           
##  38 5.51e-  3       1.52 0.011 0.004 1   e+  0 1       ENSMUSG00000120755
##  39 8.45e- 12       1.48 0.057 0.02  1.50e-  7 1       Ifngas1           
##  40 9.92e-  3       1.46 0.014 0.006 1   e+  0 1       ENSMUSG00000120744
##  41 1.91e- 12       3.65 0.025 0.003 3.38e-  8 2       Ttll7             
##  42 1.92e- 21       3.38 0.041 0.005 3.41e- 17 2       Misp3             
##  43 5.55e- 14       2.83 0.032 0.005 9.82e- 10 2       Ripply3           
##  44 8.50e-  6       2.65 0.014 0.003 1.50e-  1 2       Gm47027           
##  45 4.29e- 59       2.57 0.181 0.039 7.59e- 55 2       Spsb1             
##  46 8.55e-  6       2.52 0.014 0.003 1.51e-  1 2       Pmepa1os          
##  47 7.97e- 54       2.42 0.175 0.04  1.41e- 49 2       Dzank1            
##  48 1.51e-  5       2.40 0.012 0.002 2.68e-  1 2       Gm26644           
##  49 1.10e- 96       2.32 0.36  0.102 1.95e- 92 2       Osgin1            
##  50 3.01e-  9       2.31 0.027 0.006 5.32e-  5 2       Scn5a             
##  51 1.49e-  5       2.28 0.018 0.005 2.64e-  1 2       Bub1b             
##  52 1.16e-  3       2.25 0.012 0.004 1   e+  0 2       Gm16093           
##  53 1.60e- 11       2.24 0.044 0.012 2.83e-  7 2       Kyat3             
##  54 1.82e-  4       2.21 0.011 0.002 1   e+  0 2       ENSMUSG00000120629
##  55 1.38e-  3       2.08 0.011 0.003 1   e+  0 2       Gm17749           
##  56 2.49e-  3       2.06 0.013 0.004 1   e+  0 2       Dpysl5            
##  57 3.13e-  5       2.04 0.011 0.002 5.55e-  1 2       Apln              
##  58 3.47e-189       2.03 0.74  0.289 6.14e-185 2       Pmepa1            
##  59 1.39e-  3       2.02 0.011 0.003 1   e+  0 2       Ago4              
##  60 1.27e- 35       2.02 0.162 0.049 2.25e- 31 2       Osbpl1a           
##  61 8.06e-  4       2.89 0.011 0.002 1   e+  0 3       Gm45184           
##  62 3.39e-  5       2.63 0.013 0.002 6.01e-  1 3       Kcnt2             
##  63 3.47e-  5       2.62 0.013 0.002 6.14e-  1 3       Mab21l3           
##  64 6.00e-  4       2.46 0.013 0.003 1   e+  0 3       Gm2449            
##  65 9.60e-  3       2.39 0.011 0.003 1   e+  0 3       Fosl1             
##  66 6.64e-210       2.27 0.989 0.644 1.18e-205 3       Nr4a1             
##  67 5.42e-  5       2.26 0.017 0.004 9.61e-  1 3       Gm48302           
##  68 6.57e-  3       2.26 0.011 0.003 1   e+  0 3       Areg              
##  69 3.94e-  3       2.24 0.013 0.004 1   e+  0 3       Cd163             
##  70 8.02e-  4       2.20 0.011 0.002 1   e+  0 3       Ube2u             
##  71 8.14e-  4       2.10 0.011 0.002 1   e+  0 3       Lrrc19            
##  72 1.09e-  3       2.01 0.015 0.004 1   e+  0 3       Lsmem1            
##  73 9.75e-  3       1.96 0.011 0.003 1   e+  0 3       Hnf1aos1          
##  74 3.58e-  3       1.91 0.015 0.005 1   e+  0 3       Spag17            
##  75 9.28e-  3       1.91 0.015 0.005 1   e+  0 3       ENSMUSG00000121059
##  76 9.75e-  3       1.89 0.011 0.003 1   e+  0 3       Trbj1-7           
##  77 7.01e-  4       1.87 0.015 0.004 1   e+  0 3       Smtn              
##  78 4.18e-  3       1.85 0.011 0.003 1   e+  0 3       Gm5134            
##  79 2.49e-  4       1.85 0.015 0.003 1   e+  0 3       ENSMUSG00000121375
##  80 2.51e- 58       1.84 0.43  0.159 4.45e- 54 3       Gadd45b           
##  81 3.79e- 15       8.11 0.013 0     6.71e- 11 4       Ffar2             
##  82 1.93e-105       7.13 0.115 0.001 3.41e-101 4       Ccdc184           
##  83 3.44e-236       6.31 0.268 0.004 6.09e-232 4       Krt83             
##  84 0               6.31 0.508 0.009 0         4       Tmem176a          
##  85 0               6.05 0.595 0.014 0         4       Tmem176b          
##  86 3.91e- 75       5.88 0.131 0.008 6.92e- 71 4       Gzmc              
##  87 4.73e- 15       5.68 0.015 0     8.38e- 11 4       Olfr60            
##  88 1.73e-177       5.43 0.227 0.006 3.06e-173 4       Asb2              
##  89 4.83e- 27       5.29 0.033 0.001 8.55e- 23 4       Arnt2             
##  90 7.34e- 88       5.22 0.111 0.003 1.30e- 83 4       Septin8           
##  91 1.22e-272       5.21 0.362 0.012 2.16e-268 4       Ckb               
##  92 4.33e-169       5.16 0.22  0.007 7.67e-165 4       Gpr34             
##  93 1.92e- 59       4.90 0.074 0.002 3.39e- 55 4       Tnfrsf25          
##  94 5.50e-  9       4.79 0.011 0     9.74e-  5 4       Plxna1            
##  95 1.17e-186       4.77 0.261 0.01  2.07e-182 4       Adgrg3            
##  96 2.23e-189       4.72 0.318 0.019 3.95e-185 4       Dscam             
##  97 3.29e- 13       4.65 0.015 0     5.83e-  9 4       Sspo              
##  98 1.65e-113       4.54 0.17  0.008 2.92e-109 4       Podnl1            
##  99 3.21e- 75       4.52 0.115 0.006 5.68e- 71 4       Psd2              
## 100 1.61e- 16       4.49 0.022 0.001 2.84e- 12 4       Gm1965            
## 101 9.37e- 54       9.51 0.049 0     1.66e- 49 5       Tdrp              
## 102 3.17e-278       8.59 0.271 0.001 5.61e-274 5       Trgv2             
## 103 2.58e- 26       8.09 0.023 0     4.57e- 22 5       Traj18            
## 104 4.59e- 18       8.01 0.016 0     8.12e- 14 5       1700021A07Rik     
## 105 1.53e- 12       7.34 0.01  0     2.70e-  8 5       Trgv4             
## 106 0               6.85 0.414 0.008 0         5       Trgc1             
## 107 3.09e- 91       6.81 0.107 0.002 5.47e- 87 5       Izumo1r           
## 108 0               6.71 0.417 0.006 0         5       Trgc2             
## 109 4.93e- 18       6.21 0.018 0     8.73e- 14 5       Igsf23            
## 110 3.90e- 21       6.02 0.023 0     6.90e- 17 5       Trbv12-2          
## 111 0               5.86 0.943 0.015 0         5       Cd3e              
## 112 7.29e- 63       5.72 0.073 0.001 1.29e- 58 5       Cd40lg            
## 113 5.67e- 10       5.68 0.01  0     1.00e-  5 5       Gm4208            
## 114 1.97e- 26       5.53 0.029 0     3.48e- 22 5       A630023P12Rik     
## 115 2.21e- 49       5.47 0.065 0.002 3.91e- 45 5       Srsf12            
## 116 1.21e- 16       5.27 0.021 0.001 2.13e- 12 5       Adh1              
## 117 2.24e- 23       5.11 0.031 0.001 3.97e- 19 5       Ldhb              
## 118 6.84e-203       5.04 0.292 0.011 1.21e-198 5       Nsg2              
## 119 0               5.03 0.789 0.035 0         5       Cd3d              
## 120 1.06e- 27       5.02 0.039 0.001 1.88e- 23 5       Als2cl            
## 121 2.79e- 14       7.50 0.011 0     4.95e- 10 6       Masp1             
## 122 2.79e- 14       7.28 0.011 0     4.95e- 10 6       Gm47761           
## 123 2.96e- 18       7.20 0.023 0     5.25e- 14 6       Ppp1r1a           
## 124 3.01e- 18       6.64 0.023 0     5.33e- 14 6       Atp7b             
## 125 1.24e-  7       6.46 0.011 0     2.19e-  3 6       Fam131c           
## 126 3.27e- 11       6.38 0.023 0.001 5.79e-  7 6       Kcnk13            
## 127 3.05e- 18       6.16 0.023 0     5.41e- 14 6       Gm20395           
## 128 2.21e-  5       6.07 0.011 0     3.90e-  1 6       Ppp4r4            
## 129 1.24e-  7       6.05 0.011 0     2.19e-  3 6       Mybphl            
## 130 2.21e-  5       5.96 0.011 0     3.90e-  1 6       1700069L16Rik     
## 131 1.24e-  7       5.89 0.011 0     2.19e-  3 6       A830019P07Rik     
## 132 1.24e-  7       5.86 0.011 0     2.19e-  3 6       4930535L15Rik     
## 133 2.21e-  5       5.83 0.011 0     3.90e-  1 6       Gm11168           
## 134 2.21e-  5       5.82 0.011 0     3.90e-  1 6       6820408C15Rik     
## 135 2.21e-  5       5.81 0.011 0     3.90e-  1 6       Gm49766           
## 136 2.21e-  5       5.81 0.011 0     3.90e-  1 6       Asb11             
## 137 1.25e-  7       5.80 0.011 0     2.22e-  3 6       Olfr3             
## 138 2.21e-  5       5.77 0.011 0     3.90e-  1 6       9130409I23Rik     
## 139 2.21e-  5       5.76 0.011 0     3.90e-  1 6       Leat1             
## 140 2.21e-  5       5.74 0.011 0     3.90e-  1 6       Wdr93             
## 141 0              10.0  0.86  0.003 0         7       Cd8b1             
## 142 0               8.73 0.884 0.006 0         7       Cd8a              
## 143 1.12e-128       8.52 0.163 0.001 1.99e-124 7       Adgrg1            
## 144 8.19e- 28       8.37 0.023 0     1.45e- 23 7       Lgi1              
## 145 8.19e- 28       8.34 0.023 0     1.45e- 23 7       Mpzl2             
## 146 9.78e- 95       8.04 0.14  0.001 1.73e- 90 7       Zbtb32            
## 147 1.84e-131       7.98 0.256 0.003 3.27e-127 7       Irf4              
## 148 5.39e-186       7.97 0.256 0.001 9.54e-182 7       Gm44174           
## 149 0               7.86 0.791 0.006 0         7       Themis            
## 150 4.34e- 22       7.36 0.047 0.001 7.68e- 18 7       1500009L16Rik     
## 151 1.40e-100       7.15 0.209 0.002 2.48e- 96 7       BE692007          
## 152 1.76e- 14       7.12 0.023 0     3.12e- 10 7       Rhox4d            
## 153 6.30e-183       7.03 0.419 0.005 1.12e-178 7       Havcr2            
## 154 4.00e- 53       7.00 0.093 0.001 7.08e- 49 7       Galnt9            
## 155 4.55e- 14       6.97 0.047 0.001 8.05e- 10 7       Esm1              
## 156 1.80e- 14       6.90 0.023 0     3.19e- 10 7       Kcnmb4            
## 157 0               6.84 0.651 0.006 0         7       Pdcd1             
## 158 5.33e- 10       6.52 0.023 0     9.44e-  6 7       Ptprn             
## 159 5.33e- 10       6.51 0.023 0     9.44e-  6 7       Gm36736           
## 160 2.44e- 26       6.47 0.116 0.004 4.32e- 22 7       Cd4               
## 161 2.68e-142       9.80 0.125 0     4.74e-138 8       Esco2             
## 162 3.34e-192       9.27 0.292 0.001 5.91e-188 8       Ccna2             
## 163 1.19e- 48       9.24 0.042 0     2.10e- 44 8       Nek2              
## 164 2.98e- 48       8.72 0.083 0     5.27e- 44 8       Hmmr              
## 165 0               8.71 0.625 0.004 0         8       Pclaf             
## 166 2.63e-151       8.45 0.167 0     4.65e-147 8       E2f8              
## 167 3.61e-293       8.37 0.417 0.001 6.40e-289 8       Cdc6              
## 168 4.89e- 17       8.30 0.042 0     8.66e- 13 8       Pdzk1             
## 169 5.46e- 64       8.16 0.083 0     9.67e- 60 8       Ska1              
## 170 5.62e-131       7.92 0.208 0.001 9.94e-127 8       E2f7              
## 171 5.72e- 25       7.81 0.042 0     1.01e- 20 8       Matn2             
## 172 8.99e- 54       7.78 0.125 0.001 1.59e- 49 8       Ccnb2             
## 173 5.97e- 25       7.62 0.042 0     1.06e- 20 8       Gm32880           
## 174 5.90e-125       7.59 0.333 0.003 1.05e-120 8       Birc5             
## 175 4.89e- 17       7.38 0.042 0     8.66e- 13 8       Cfap58            
## 176 4.43e-121       7.34 0.292 0.002 7.84e-117 8       Ckap2l            
## 177 4.89e- 17       7.21 0.042 0     8.66e- 13 8       Dlk1              
## 178 7.04e- 48       7.19 0.125 0.001 1.25e- 43 8       Cenpf             
## 179 3.52e-164       7.06 0.292 0.001 6.23e-160 8       Mcm10             
## 180 8.48e-246       6.99 0.708 0.007 1.50e-241 8       E2f1              
## 181 7.82e-227      14.0  0.2   0     1.38e-222 9       Ccl2              
## 182 2.58e-269      13.7  0.333 0     4.57e-265 9       Ptgs2             
## 183 7.82e-227      13.0  0.2   0     1.38e-222 9       Fcgr1             
## 184 0              12.7  0.933 0.003 0         9       Il1b              
## 185 0              12.6  0.533 0     0         9       Csf2rb            
## 186 7.13e-152      12.5  0.133 0     1.26e-147 9       Sirpb1c           
## 187 1.85e-271      12.4  0.4   0.001 3.28e-267 9       Thbs1             
## 188 3.16e- 44      11.8  0.133 0.001 5.59e- 40 9       Slpi              
## 189 1.48e-136      11.8  0.2   0     2.62e-132 9       Clec4n            
## 190 0              11.7  0.8   0.001 0         9       Mpeg1             
## 191 7.02e- 77      11.7  0.067 0     1.24e- 72 9       Slfn4             
## 192 7.82e-227      11.6  0.2   0     1.38e-222 9       Tlr7              
## 193 0              11.5  0.733 0.002 0         9       Cybb              
## 194 2.61e-226      11.4  0.4   0.001 4.62e-222 9       Il1rn             
## 195 0              11.3  0.667 0.001 0         9       Csf1r             
## 196 0              11.3  0.333 0     0         9       Csf3r             
## 197 0              11.3  0.6   0.001 0         9       Ccr1              
## 198 0              11.3  0.467 0     0         9       C3                
## 199 0              11.1  0.467 0     0         9       F10               
## 200 0              11.1  0.6   0.002 0         9       Cd14

Conserved markers by clusters

Cluster 0

markers_c0 <- FindConservedMarkers(merged_object, ident.1 = 0, grouping.var = "orig.ident")
## Testing group Sample_4: (0) vs (2, 1, 3, 4, 5, 7)
## Testing group Sample_7: (0) vs (4, 3, 5, 2, 1, 7, 6)
## Testing group Sample_8: (0) vs (4, 7, 5, 2, 1, 9, 3, 6, 8)
## Testing group Sample_3: (0) vs (5, 1, 2, 4, 3, 7, 6, 8, 9)
## Testing group Sample_2: (0) vs (1, 3, 2, 5, 4, 6, 9, 7, 8)
## Testing group Sample_6: (0) vs (4, 1, 5, 2, 7, 3, 8, 6, 9)
## Testing group Sample_1: (0) vs (1, 2, 4, 3, 6, 5, 7, 8, 9)
head(markers_c0)
##        Sample_4_p_val Sample_4_avg_log2FC Sample_4_pct.1 Sample_4_pct.2
## Gzma     1.215047e-12           1.6655519          1.000          0.747
## Gzmb     5.411346e-12           2.3091115          1.000          0.526
## Tmsb4x   1.745308e-11           0.9833812          1.000          1.000
## Prf1     2.605209e-14           2.3996941          1.000          0.674
## Actb     6.355907e-11           1.1465537          1.000          1.000
## Emb      8.824234e-07          -3.9337882          0.133          0.653
##        Sample_4_p_val_adj Sample_7_p_val Sample_7_avg_log2FC Sample_7_pct.1
## Gzma         2.151362e-08   1.027687e-11           2.7644970          0.962
## Gzmb         9.581330e-08   1.822884e-15           3.8989895          0.923
## Tmsb4x       3.090242e-07   1.425675e-03           0.6393013          1.000
## Prf1         4.612783e-10   3.557915e-11           2.6507811          0.962
## Actb         1.125377e-06   7.407543e-08           1.1707625          1.000
## Emb          1.562419e-02   1.081180e-08          -2.8772127          0.346
##        Sample_7_pct.2 Sample_7_p_val_adj Sample_8_p_val Sample_8_avg_log2FC
## Gzma            0.635       1.819622e-07   7.639380e-57           1.7817571
## Gzmb            0.318       3.227599e-11   1.856060e-53           2.6930772
## Tmsb4x          1.000       1.000000e+00   3.685610e-38           0.8943607
## Prf1            0.511       6.299643e-07   3.744258e-48           2.0622162
## Actb            1.000       1.311580e-03   6.413922e-41           1.0543377
## Emb             0.841       1.914337e-04   4.428778e-34          -3.7753774
##        Sample_8_pct.1 Sample_8_pct.2 Sample_8_p_val_adj Sample_3_p_val
## Gzma            1.000          0.784       1.352629e-52   6.591215e-61
## Gzmb            1.000          0.532       3.286340e-49   8.861015e-63
## Tmsb4x          1.000          1.000       6.525742e-34   5.382758e-58
## Prf1            0.974          0.673       6.629584e-44   1.138935e-56
## Actb            1.000          0.998       1.135649e-36   4.943648e-54
## Emb             0.188          0.729       7.841594e-30   6.967848e-43
##        Sample_3_avg_log2FC Sample_3_pct.1 Sample_3_pct.2 Sample_3_p_val_adj
## Gzma              1.308551          0.995          0.867       1.167040e-56
## Gzmb              2.620633          0.977          0.597       1.568931e-58
## Tmsb4x            1.026097          1.000          1.000       9.530711e-54
## Prf1              1.798592          0.991          0.681       2.016599e-52
## Actb              1.023439          1.000          1.000       8.753223e-50
## Emb              -3.939754          0.156          0.679       1.233727e-38
##        Sample_2_p_val Sample_2_avg_log2FC Sample_2_pct.1 Sample_2_pct.2
## Gzma     5.195224e-65           1.2633827          1.000          0.885
## Gzmb     1.257317e-59           2.3616030          0.966          0.612
## Tmsb4x   2.013274e-64           0.9987883          1.000          1.000
## Prf1     4.482487e-59           1.7510673          0.971          0.672
## Actb     8.633681e-59           1.0514522          1.000          0.997
## Emb      1.659946e-42          -3.5009636          0.172          0.675
##        Sample_2_p_val_adj Sample_6_p_val Sample_6_avg_log2FC Sample_6_pct.1
## Gzma         9.198663e-61   1.222551e-68           1.3095673          1.000
## Gzmb         2.226205e-55   1.301391e-76           2.3452182          0.992
## Tmsb4x       3.564704e-60   2.738814e-69           0.8986653          1.000
## Prf1         7.936692e-55   5.136174e-68           1.7117577          0.992
## Actb         1.528680e-54   6.939773e-58           0.9783775          1.000
## Emb          2.939100e-38   2.492018e-47          -3.6481565          0.131
##        Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val Sample_1_avg_log2FC
## Gzma            0.865       2.164650e-64  8.252536e-119           1.3028958
## Gzmb            0.590       2.304244e-72  7.981280e-117           2.4144417
## Tmsb4x          1.000       4.849343e-65  4.468515e-107           0.8454760
## Prf1            0.739       9.094110e-64  4.594595e-106           1.6442724
## Actb            1.000       1.228756e-53   9.625332e-99           0.9857227
## Emb             0.667       4.412367e-43   4.121046e-83          -3.3484939
##        Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj     max_pval
## Gzma            1.000          0.897      1.461194e-114 1.027687e-11
## Gzmb            0.963          0.650      1.413166e-112 5.411346e-12
## Tmsb4x          1.000          1.000      7.911952e-103 1.425675e-03
## Prf1            0.987          0.751      8.135189e-102 3.557915e-11
## Actb            1.000          1.000       1.704261e-94 7.407543e-08
## Emb             0.152          0.682       7.296725e-79 8.824234e-07
##        minimump_p_val
## Gzma    5.776776e-118
## Gzmb    5.586896e-116
## Tmsb4x  3.127960e-106
## Prf1    3.216216e-105
## Actb     6.737733e-98
## Emb      2.884732e-82
FeaturePlot(merged_object, features = c("Gzma", "Gzmb", "Tmsb4x", "Prf1", "Actb", "Emb"), min.cutoff = 'q10', reduction = "umap")

Cluster 1

markers_c1 <- FindConservedMarkers(merged_object, ident.1 = 1, grouping.var = "orig.ident")
## Testing group Sample_4: (1) vs (2, 3, 0, 4, 5, 7)
## Testing group Sample_7: (1) vs (4, 3, 5, 2, 0, 7, 6)
## Testing group Sample_8: (1) vs (4, 0, 7, 5, 2, 9, 3, 6, 8)
## Testing group Sample_3: (1) vs (5, 2, 4, 3, 0, 7, 6, 8, 9)
## Testing group Sample_2: (1) vs (3, 2, 5, 0, 4, 6, 9, 7, 8)
## Testing group Sample_6: (1) vs (0, 4, 5, 2, 7, 3, 8, 6, 9)
## Testing group Sample_1: (1) vs (2, 0, 4, 3, 6, 5, 7, 8, 9)
head(markers_c1)
##        Sample_4_p_val Sample_4_avg_log2FC Sample_4_pct.1 Sample_4_pct.2
## Ccr2     1.477598e-06           1.4508060          0.829          0.500
## Ccl5     5.228492e-01          -0.5297467          1.000          0.929
## Gzmb     1.241403e-01          -1.1658451          0.634          0.643
## Cd7      1.154982e-05           1.0281320          0.927          0.488
## Rap1b    2.415908e-02          -0.5749798          0.976          0.857
## Ctla2a   4.300499e-04           0.8509587          0.732          0.488
##        Sample_4_p_val_adj Sample_7_p_val Sample_7_avg_log2FC Sample_7_pct.1
## Ccr2           0.02616235   1.196308e-08           1.2062496          0.877
## Ccl5           1.00000000   9.755418e-01          -0.4906165          0.892
## Gzmb           1.00000000   7.180655e-01          -1.1044222          0.369
## Cd7            0.20450120   2.706668e-02           0.2405162          0.908
## Rap1b          1.00000000   1.172166e-02          -0.4312940          0.846
## Ctla2a         1.00000000   2.046767e-01           0.2818947          0.723
##        Sample_7_pct.2 Sample_7_p_val_adj Sample_8_p_val Sample_8_avg_log2FC
## Ccr2            0.567       0.0002118184   2.415408e-21           1.3066011
## Ccl5            0.840       1.0000000000   5.815502e-08          -0.9817531
## Gzmb            0.381       1.0000000000   7.207646e-08          -1.8596237
## Cd7             0.758       1.0000000000   2.160264e-09           0.5157704
## Rap1b           0.902       1.0000000000   8.438946e-05          -0.5819957
## Ctla2a          0.716       1.0000000000   4.366782e-11           0.7641169
##        Sample_8_pct.1 Sample_8_pct.2 Sample_8_p_val_adj Sample_3_p_val
## Ccr2            0.885          0.560       4.276721e-17   4.161815e-26
## Ccl5            0.980          0.927       1.029693e-03   2.822960e-13
## Gzmb            0.547          0.682       1.276186e-03   1.194314e-07
## Cd7             0.899          0.652       3.824963e-05   4.549868e-14
## Rap1b           0.838          0.889       1.000000e+00   2.766807e-05
## Ctla2a          0.784          0.489       7.731824e-07   3.742153e-10
##        Sample_3_avg_log2FC Sample_3_pct.1 Sample_3_pct.2 Sample_3_p_val_adj
## Ccr2             1.2609879          0.842          0.525       7.368910e-22
## Ccl5            -0.7440240          0.986          0.980       4.998333e-09
## Gzmb            -1.4818090          0.637          0.720       2.114652e-03
## Cd7              0.5950440          0.833          0.558       8.055997e-10
## Rap1b           -0.4902986          0.842          0.887       4.898909e-01
## Ctla2a           0.7045460          0.726          0.510       6.625857e-06
##        Sample_2_p_val Sample_2_avg_log2FC Sample_2_pct.1 Sample_2_pct.2
## Ccr2     2.016472e-33           1.2684343          0.894          0.542
## Ccl5     8.951389e-15          -0.7258744          0.992          0.969
## Gzmb     1.177891e-08          -1.4104502          0.661          0.724
## Cd7      3.952732e-21           0.9196994          0.852          0.533
## Rap1b    4.920275e-06          -0.5265137          0.822          0.885
## Ctla2a   3.683701e-12           0.9311525          0.691          0.444
##        Sample_2_p_val_adj Sample_6_p_val Sample_6_avg_log2FC Sample_6_pct.1
## Ccr2         3.570365e-29   6.660988e-45           1.4913377          0.932
## Ccl5         1.584933e-10   3.047792e-11          -0.7074490          0.988
## Gzmb         2.085574e-04   1.528163e-10          -1.4262061          0.651
## Cd7          6.998708e-17   5.082192e-15           0.7004850          0.835
## Rap1b        8.711839e-02   1.709772e-12          -0.7099189          0.851
## Ctla2a       6.522361e-08   1.107877e-20           1.1238461          0.723
##        Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val Sample_1_avg_log2FC
## Ccr2            0.536       1.179395e-40   1.691825e-54           1.1744811
## Ccl5            0.964       5.396421e-07   4.382549e-33          -0.8984798
## Gzmb            0.725       2.705766e-06   2.146793e-25          -1.9151981
## Cd7             0.617       8.998530e-11   2.698521e-25           0.6564855
## Rap1b           0.914       3.027322e-08   9.486466e-21          -0.7140731
## Ctla2a          0.422       1.961606e-16   1.165175e-13           0.5815499
##        Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj     max_pval
## Ccr2            0.930          0.579       2.995545e-50 1.477598e-06
## Ccl5            0.987          0.983       7.759740e-29 9.755418e-01
## Gzmb            0.642          0.769       3.801112e-21 7.180655e-01
## Cd7             0.863          0.612       4.778001e-21 2.706668e-02
## Rap1b           0.784          0.897       1.679674e-16 2.415908e-02
## Ctla2a          0.725          0.515       2.063058e-09 2.046767e-01
##        minimump_p_val
## Ccr2     1.184277e-53
## Ccl5     3.067784e-32
## Gzmb     1.502755e-24
## Cd7      1.888964e-24
## Rap1b    6.640526e-20
## Ctla2a   7.755136e-20
FeaturePlot(merged_object, features = c("Ccr2", "Ccl5", "Gzmb", "Cd7", "Rap1b", "Ctla2a"), min.cutoff = 'q10', reduction = "umap")

Cluster 2

markers_c2 <- FindConservedMarkers(merged_object, ident.1 = 2, grouping.var = "orig.ident")
## Testing group Sample_4: (2) vs (1, 3, 0, 4, 5, 7)
## Testing group Sample_7: (2) vs (4, 3, 5, 1, 0, 7, 6)
## Testing group Sample_8: (2) vs (4, 0, 7, 5, 1, 9, 3, 6, 8)
## Testing group Sample_3: (2) vs (5, 1, 4, 3, 0, 7, 6, 8, 9)
## Testing group Sample_2: (2) vs (1, 3, 5, 0, 4, 6, 9, 7, 8)
## Testing group Sample_6: (2) vs (0, 4, 1, 5, 7, 3, 8, 6, 9)
## Testing group Sample_1: (2) vs (1, 0, 4, 3, 6, 5, 7, 8, 9)
head(markers_c2)
##        Sample_4_p_val Sample_4_avg_log2FC Sample_4_pct.1 Sample_4_pct.2
## Tmsb4x   2.126575e-03          -0.9393720          1.000          1.000
## Pmepa1   8.497064e-05           2.3574565          0.727          0.246
## Cfl1     1.283277e-01          -0.4920707          1.000          1.000
## Actb     6.877714e-04          -0.9738109          1.000          1.000
## Ncr1     6.609110e-03          -1.9666518          0.273          0.719
## Pfn1     1.882067e-04          -1.4659598          0.818          0.982
##        Sample_4_p_val_adj Sample_7_p_val Sample_7_avg_log2FC Sample_7_pct.1
## Tmsb4x                  1    0.006305328          -0.5618892          1.000
## Pmepa1                  1    0.001317577           1.5985097          0.706
## Cfl1                    1    0.005751684          -0.5873940          1.000
## Actb                    1    0.009030797          -0.5576387          1.000
## Ncr1                    1    0.113479711          -0.8403411          0.529
## Pfn1                    1    0.127088695          -0.3465550          0.941
##        Sample_7_pct.2 Sample_7_p_val_adj Sample_8_p_val Sample_8_avg_log2FC
## Tmsb4x          1.000                  1   8.965747e-23          -0.9892678
## Pmepa1          0.306                  1   1.728834e-19           1.9303840
## Cfl1            0.992                  1   7.279888e-18          -0.9227658
## Actb            1.000                  1   1.125718e-15          -0.8355359
## Ncr1            0.674                  1   4.114623e-14          -1.7499795
## Pfn1            0.975                  1   1.508448e-10          -0.7139207
##        Sample_8_pct.1 Sample_8_pct.2 Sample_8_p_val_adj Sample_3_p_val
## Tmsb4x          1.000          1.000       1.587475e-18   1.528841e-32
## Pmepa1          0.723          0.310       3.061074e-15   2.465588e-39
## Cfl1            0.957          1.000       1.288977e-13   3.698814e-31
## Actb            0.989          1.000       1.993196e-11   1.020776e-31
## Ncr1            0.404          0.759       7.285351e-10   6.065192e-32
## Pfn1            0.915          0.973       2.670857e-06   1.286989e-24
##        Sample_3_avg_log2FC Sample_3_pct.1 Sample_3_pct.2 Sample_3_p_val_adj
## Tmsb4x          -0.9714757          1.000          1.000       2.706966e-28
## Pmepa1           1.9931870          0.816          0.330       4.365570e-35
## Cfl1            -1.0118830          0.966          0.994       6.549121e-27
## Actb            -0.9954313          1.000          1.000       1.807386e-27
## Ncr1            -2.7161662          0.230          0.702       1.073903e-27
## Pfn1            -0.9622337          0.856          0.974       2.278743e-20
##        Sample_2_p_val Sample_2_avg_log2FC Sample_2_pct.1 Sample_2_pct.2
## Tmsb4x   4.674011e-42           -1.044405          1.000          1.000
## Pmepa1   8.950016e-24            1.771852          0.646          0.310
## Cfl1     1.572640e-34           -1.000624          0.979          0.991
## Actb     5.979804e-40           -1.073383          0.990          1.000
## Ncr1     4.307079e-29           -2.244020          0.302          0.710
## Pfn1     3.396034e-32           -1.120258          0.854          0.968
##        Sample_2_p_val_adj Sample_6_p_val Sample_6_avg_log2FC Sample_6_pct.1
## Tmsb4x       8.275803e-38   1.146018e-43          -1.1447200          1.000
## Pmepa1       1.584690e-19   8.081303e-47           2.4725710          0.813
## Cfl1         2.784516e-30   7.232115e-28          -0.9485548          0.986
## Actb         1.058784e-35   3.263250e-24          -0.8642225          1.000
## Ncr1         7.626115e-25   8.888288e-31          -2.2948923          0.309
## Pfn1         6.013017e-28   2.531783e-29          -1.1544189          0.856
##        Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val Sample_1_avg_log2FC
## Tmsb4x          1.000       2.029139e-39   3.938290e-66          -0.9270128
## Pmepa1          0.255       1.430876e-42   2.028102e-62           2.0005571
## Cfl1            0.999       1.280518e-23   2.731259e-56          -0.9229065
## Actb            1.000       5.777911e-20   2.043755e-52          -0.8740888
## Ncr1            0.796       1.573760e-26   1.382370e-48          -1.9954968
## Pfn1            0.965       4.482774e-25   3.494870e-44          -0.9219084
##        Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj    max_pval
## Tmsb4x          1.000          1.000       6.973136e-62 0.006305328
## Pmepa1          0.729          0.267       3.590958e-58 0.001317577
## Cfl1            0.990          0.998       4.835968e-52 0.128327695
## Actb            1.000          1.000       3.618673e-48 0.009030797
## Ncr1            0.386          0.767       2.447624e-44 0.113479711
## Pfn1            0.901          0.975       6.188017e-40 0.127088695
##        minimump_p_val
## Tmsb4x   2.756803e-65
## Pmepa1   1.419672e-61
## Cfl1     1.911882e-55
## Actb     1.430629e-51
## Ncr1     9.676590e-48
## Pfn1     2.446409e-43
FeaturePlot(merged_object, features = c("Tmsb4x", "Pmepa1", "Cfl1", "Actb", "Ncr1", "Pfn1"), min.cutoff = 'q10', reduction = "umap")

Cluster 3

markers_c3 <- FindConservedMarkers(merged_object, ident.1 = 3, grouping.var = "orig.ident")
## Testing group Sample_4: (3) vs (2, 1, 0, 4, 5, 7)
## Testing group Sample_7: (3) vs (4, 5, 2, 1, 0, 7, 6)
## Testing group Sample_8: (3) vs (4, 0, 7, 5, 2, 1, 9, 6, 8)
## Testing group Sample_3: (3) vs (5, 1, 2, 4, 0, 7, 6, 8, 9)
## Testing group Sample_2: (3) vs (1, 2, 5, 0, 4, 6, 9, 7, 8)
## Testing group Sample_6: (3) vs (0, 4, 1, 5, 2, 7, 8, 6, 9)
## Testing group Sample_1: (3) vs (1, 2, 0, 4, 6, 5, 7, 8, 9)
head(markers_c3)
##        Sample_4_p_val Sample_4_avg_log2FC Sample_4_pct.1 Sample_4_pct.2
## Nr4a1    1.075620e-03            2.791555          1.000          0.661
## Pim1     6.061426e-03            1.168955          1.000          0.907
## Nr4a3    6.704097e-03            1.626052          1.000          0.534
## Nfkbiz   4.364376e-02            1.699575          0.857          0.568
## Nfkbid   7.821487e-05            3.211632          0.857          0.297
## Nfkbia   2.269208e-03            1.877935          1.000          0.754
##        Sample_4_p_val_adj Sample_7_p_val Sample_7_avg_log2FC Sample_7_pct.1
## Nr4a1                   1   0.0121817370           1.3701563            0.9
## Pim1                    1   0.0003269975           1.2556559            1.0
## Nr4a3                   1   0.0518880072           0.9726873            0.7
## Nfkbiz                  1   0.0057749661           0.9913053            0.9
## Nfkbid                  1   0.0190860168           1.4801381            0.7
## Nfkbia                  1   0.0046363657           1.0017431            1.0
##        Sample_7_pct.2 Sample_7_p_val_adj Sample_8_p_val Sample_8_avg_log2FC
## Nr4a1           0.639                  1   3.672879e-26            2.435278
## Pim1            0.956                  1   1.131362e-14            1.229889
## Nr4a3           0.458                  1   2.989974e-10            1.448916
## Nfkbiz          0.671                  1   9.308188e-11            1.383970
## Nfkbid          0.418                  1   6.377328e-19            2.148724
## Nfkbia          0.843                  1   1.401214e-12            1.179727
##        Sample_8_pct.1 Sample_8_pct.2 Sample_8_p_val_adj Sample_3_p_val
## Nr4a1           1.000          0.641       6.503199e-22   4.764986e-31
## Pim1            1.000          0.927       2.003190e-10   5.732263e-20
## Nr4a3           0.815          0.454       5.294047e-06   4.295224e-15
## Nfkbiz          0.907          0.633       1.648108e-06   1.560632e-18
## Nfkbid          0.852          0.349       1.129170e-14   7.920557e-14
## Nfkbia          1.000          0.854       2.480990e-08   6.257187e-17
##        Sample_3_avg_log2FC Sample_3_pct.1 Sample_3_pct.2 Sample_3_p_val_adj
## Nr4a1             2.260595          0.987          0.626       8.436884e-27
## Pim1              1.151868          1.000          0.942       1.014954e-15
## Nr4a3             1.677667          0.855          0.475       7.605124e-11
## Nfkbiz            1.455207          0.961          0.668       2.763256e-14
## Nfkbid            1.642791          0.816          0.394       1.402414e-09
## Nfkbia            1.154027          1.000          0.843       1.107897e-12
##        Sample_2_p_val Sample_2_avg_log2FC Sample_2_pct.1 Sample_2_pct.2
## Nr4a1    1.838418e-34           2.3155490          0.976          0.595
## Pim1     3.136641e-16           0.8945974          1.000          0.951
## Nr4a3    1.403351e-13           1.1978058          0.833          0.465
## Nfkbiz   1.658155e-13           1.1750470          0.905          0.632
## Nfkbid   1.189584e-12           1.5745423          0.726          0.372
## Nfkbia   2.059349e-21           1.1855633          1.000          0.838
##        Sample_2_p_val_adj Sample_6_p_val Sample_6_avg_log2FC Sample_6_pct.1
## Nr4a1        3.255103e-30   9.956319e-35           2.1426219          0.989
## Pim1         5.553737e-12   1.115536e-26           1.1996934          1.000
## Nr4a3        2.484773e-09   3.410772e-22           1.8185824          0.860
## Nfkbiz       2.935929e-09   3.901547e-20           1.3653676          0.957
## Nfkbid       2.106277e-08   6.063482e-25           1.8654458          0.892
## Nfkbia       3.646283e-17   1.019137e-18           0.9790313          0.989
##        Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val Sample_1_avg_log2FC
## Nr4a1           0.656       1.762866e-30   1.158663e-83           2.1325037
## Pim1            0.946       1.975167e-22   2.276500e-55           1.1295109
## Nr4a3           0.443       6.039113e-18   2.045120e-47           1.5872597
## Nfkbiz          0.709       6.908079e-16   7.640821e-47           1.3556321
## Nfkbid          0.474       1.073600e-20   9.876995e-43           1.6775330
## Nfkbia          0.882       1.804483e-14   1.861125e-39           0.9540879
##        Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj    max_pval
## Nr4a1           0.995          0.677       2.051528e-79 0.012181737
## Pim1            1.000          0.964       4.030771e-51 0.006061426
## Nr4a3           0.894          0.460       3.621089e-43 0.051888007
## Nfkbiz          0.959          0.698       1.352884e-42 0.043643757
## Nfkbid          0.862          0.503       1.748821e-38 0.019086017
## Nfkbia          1.000          0.873       3.295307e-35 0.004636366
##        minimump_p_val
## Nr4a1    8.110638e-83
## Pim1     1.593550e-54
## Nr4a3    1.431584e-46
## Nfkbiz   5.348575e-46
## Nfkbid   6.913896e-42
## Nfkbia   1.302787e-38
FeaturePlot(merged_object, features = c("Nr4a1", "Pim1", "Nr4a3", "Nfkbiz", "Nfkbid", "Nfkbia"), min.cutoff = 'q10', reduction = "umap")

Cluster 4

markers_c4 <- FindConservedMarkers(merged_object, ident.1 = 4, grouping.var = "orig.ident")
## Testing group Sample_4: (4) vs (2, 1, 3, 0, 5, 7)
## Testing group Sample_7: (4) vs (3, 5, 2, 1, 0, 7, 6)
## Testing group Sample_8: (4) vs (0, 7, 5, 2, 1, 9, 3, 6, 8)
## Testing group Sample_3: (4) vs (5, 1, 2, 3, 0, 7, 6, 8, 9)
## Testing group Sample_2: (4) vs (1, 3, 2, 5, 0, 6, 9, 7, 8)
## Testing group Sample_6: (4) vs (0, 1, 5, 2, 7, 3, 8, 6, 9)
## Testing group Sample_1: (4) vs (1, 2, 0, 3, 6, 5, 7, 8, 9)
head(markers_c4)
##          Sample_4_p_val Sample_4_avg_log2FC Sample_4_pct.1 Sample_4_pct.2
## Tmem176b   2.497853e-13            7.200362          0.524          0.010
## Tmem176a   5.959621e-12            6.246949          0.476          0.010
## Cxcr6      4.221679e-05            1.457118          0.429          0.077
## Il7r       9.169272e-12            3.162340          0.857          0.173
## Ckb        5.450622e-05            3.871379          0.238          0.019
## Krt83      1.070233e-03            2.922802          0.190          0.019
##          Sample_4_p_val_adj Sample_7_p_val Sample_7_avg_log2FC Sample_7_pct.1
## Tmem176b       4.422698e-09   3.420587e-27           4.0630651          0.675
## Tmem176a       1.055211e-07   1.538395e-21           3.2830784          0.584
## Cxcr6          7.474906e-01   3.956218e-15           1.6618579          0.779
## Il7r           1.623511e-07   1.542046e-07           0.8929757          0.844
## Ckb            9.650871e-01   4.314771e-14           3.6656390          0.390
## Krt83          1.000000e+00   2.563678e-14           5.7550200          0.312
##          Sample_7_pct.2 Sample_7_p_val_adj Sample_8_p_val Sample_8_avg_log2FC
## Tmem176b          0.055       6.056491e-23   7.316469e-80            6.759692
## Tmem176a          0.049       2.723883e-17   8.403114e-64            6.864799
## Cxcr6             0.236       7.004879e-11   6.906206e-40            3.522176
## Il7r              0.440       2.730346e-03   1.580684e-35            3.058017
## Ckb               0.033       7.639734e-10   2.262701e-40            5.147719
## Krt83             0.005       4.539249e-10   1.796440e-31            6.181299
##          Sample_8_pct.1 Sample_8_pct.2 Sample_8_p_val_adj Sample_3_p_val
## Tmem176b          0.704          0.015       1.295454e-75   1.339436e-80
## Tmem176a          0.568          0.011       1.487855e-59   5.186672e-68
## Cxcr6             0.593          0.064       1.222813e-35   2.201368e-34
## Il7r              0.716          0.135       2.798759e-31   2.233786e-38
## Ckb               0.420          0.017       4.006338e-36   2.499386e-24
## Krt83             0.296          0.007       3.180776e-27   4.419033e-26
##          Sample_3_avg_log2FC Sample_3_pct.1 Sample_3_pct.2 Sample_3_p_val_adj
## Tmem176b            6.669095          0.554          0.008       2.371605e-76
## Tmem176a            6.141527          0.482          0.008       9.183522e-64
## Cxcr6               4.151166          0.393          0.025       3.897742e-30
## Il7r                3.354353          0.661          0.083       3.955142e-34
## Ckb                 5.246590          0.214          0.008       4.425412e-20
## Krt83               5.842426          0.179          0.003       7.824339e-22
##          Sample_2_p_val Sample_2_avg_log2FC Sample_2_pct.1 Sample_2_pct.2
## Tmem176b   2.309161e-60            5.413552          0.476          0.016
## Tmem176a   1.170699e-74            8.452846          0.429          0.002
## Cxcr6      1.278999e-59            4.611491          0.492          0.018
## Il7r       3.577088e-48            3.811770          0.667          0.071
## Ckb        1.043803e-42            5.097074          0.317          0.009
## Krt83      1.334703e-37            6.214594          0.254          0.005
##          Sample_2_p_val_adj Sample_6_p_val Sample_6_avg_log2FC Sample_6_pct.1
## Tmem176b       4.088600e-56   1.606858e-93            6.826222          0.585
## Tmem176a       2.072839e-70   2.791078e-89            7.686168          0.512
## Cxcr6          2.264595e-55   2.069874e-61            4.440349          0.561
## Il7r           6.333593e-44   3.923625e-65            3.023299          0.829
## Ckb            1.848158e-38   1.228709e-57            5.000168          0.439
## Krt83          2.363225e-33   2.538458e-43            6.252155          0.268
##          Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val Sample_1_avg_log2FC
## Tmem176b          0.010       2.845103e-89  6.103439e-124            5.659137
## Tmem176a          0.004       4.941883e-85  3.481817e-107            5.801093
## Cxcr6             0.035       3.664919e-57   1.255234e-94            4.741620
## Il7r              0.095       6.947171e-61   8.459578e-86            3.694017
## Ckb               0.016       2.175552e-53   1.175041e-81            5.588630
## Krt83             0.004       4.494594e-39   2.149519e-80            6.794259
##          Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj     max_pval
## Tmem176b          0.557          0.015      1.080675e-119 2.497853e-13
## Tmem176a          0.456          0.010      6.164904e-103 5.959621e-12
## Cxcr6             0.519          0.021       2.222516e-90 4.221679e-05
## Il7r              0.785          0.079       1.497853e-81 1.542046e-07
## Ckb               0.367          0.009       2.080528e-77 5.450622e-05
## Krt83             0.291          0.003       3.805938e-76 1.070233e-03
##          minimump_p_val
## Tmem176b  4.272407e-123
## Tmem176a  2.437272e-106
## Cxcr6      8.786635e-94
## Il7r       5.921705e-85
## Ckb        8.225290e-81
## Krt83      1.504663e-79
FeaturePlot(merged_object, features = c("Tmem176b", "Tmem176a", "Cxcr6", "Il7r", "Ckb", "Krt83"), min.cutoff = 'q10', reduction = "umap")

Cluster 5

markers_c5 <- FindConservedMarkers(merged_object, ident.1 = 5, grouping.var = "orig.ident")
## Testing group Sample_4: (5) vs (2, 1, 3, 0, 4, 7)
## Testing group Sample_7: (5) vs (4, 3, 2, 1, 0, 7, 6)
## Testing group Sample_8: (5) vs (4, 0, 7, 2, 1, 9, 3, 6, 8)
## Testing group Sample_3: (5) vs (1, 2, 4, 3, 0, 7, 6, 8, 9)
## Testing group Sample_2: (5) vs (1, 3, 2, 0, 4, 6, 9, 7, 8)
## Testing group Sample_6: (5) vs (0, 4, 1, 2, 7, 3, 8, 6, 9)
## Testing group Sample_1: (5) vs (1, 2, 0, 4, 3, 6, 7, 8, 9)
head(markers_c5)
##       Sample_4_p_val Sample_4_avg_log2FC Sample_4_pct.1 Sample_4_pct.2
## Cd3e    3.548072e-15            3.190758            0.9          0.043
## Cd3g    2.133633e-12            2.743428            0.9          0.070
## Trgc2   2.027462e-05            2.580021            0.3          0.017
## Cd3d    2.075574e-15            3.846109            1.0          0.070
## Trgc1   1.247819e-05            5.272598            0.3          0.017
## Dpp4    7.393485e-12            7.194596            0.4          0.000
##       Sample_4_p_val_adj Sample_7_p_val Sample_7_avg_log2FC Sample_7_pct.1
## Cd3e        6.282216e-11   7.781409e-41            4.141041          0.929
## Cd3g        3.777811e-08   8.361552e-30            2.809674          0.875
## Trgc2       3.589824e-01   1.552416e-21            8.458705          0.411
## Cd3d        3.675012e-11   1.227549e-40            4.167588          0.911
## Trgc1       2.209388e-01   2.273059e-22            6.895311          0.464
## Dpp4        1.309091e-07   6.067777e-09            4.448364          0.250
##       Sample_7_pct.2 Sample_7_p_val_adj Sample_8_p_val Sample_8_avg_log2FC
## Cd3e           0.049       1.377776e-36  9.727149e-110            5.352490
## Cd3g           0.103       1.480496e-25   3.136613e-69            4.986241
## Trgc2          0.000       2.748708e-17   9.517830e-43            5.693060
## Cd3d           0.049       2.173498e-36   3.757123e-76            4.870504
## Trgc1          0.010       4.024678e-18   7.664631e-52            7.235920
## Dpp4           0.025       1.074361e-04   6.037741e-22            3.212364
##       Sample_8_pct.1 Sample_8_pct.2 Sample_8_p_val_adj Sample_3_p_val
## Cd3e           0.966          0.016      1.722289e-105  9.169223e-133
## Cd3g           0.780          0.038       5.553687e-65   2.200579e-77
## Trgc2          0.441          0.014       1.685227e-38   2.503943e-52
## Cd3d           0.864          0.047       6.652363e-72   1.555333e-91
## Trgc1          0.475          0.009       1.357100e-47   5.059710e-50
## Dpp4           0.339          0.029       1.069042e-17   3.516940e-20
##       Sample_3_avg_log2FC Sample_3_pct.1 Sample_3_pct.2 Sample_3_p_val_adj
## Cd3e             5.861757          0.912          0.014      1.623503e-128
## Cd3g             5.167211          0.702          0.029       3.896346e-73
## Trgc2           10.748960          0.298          0.000       4.433482e-48
## Cd3d             5.760607          0.737          0.022       2.753872e-87
## Trgc1            7.703775          0.333          0.004       8.958722e-46
## Dpp4             5.340003          0.175          0.007       6.227094e-16
##       Sample_2_p_val Sample_2_avg_log2FC Sample_2_pct.1 Sample_2_pct.2
## Cd3e   7.238403e-139            6.297918          0.918          0.014
## Cd3g    2.767216e-80            4.740166          0.673          0.022
## Trgc2   1.327865e-55            7.616575          0.347          0.004
## Cd3d    2.569800e-73            5.095550          0.714          0.035
## Trgc1   3.605875e-44            5.727967          0.327          0.007
## Dpp4    6.178949e-36            6.329157          0.265          0.006
##       Sample_2_p_val_adj Sample_6_p_val Sample_6_avg_log2FC Sample_6_pct.1
## Cd3e       1.281632e-134  9.692243e-146            6.688313           0.92
## Cd3g        4.899632e-76   1.491914e-85            4.363994           0.78
## Trgc2       2.351118e-51   7.361676e-59            6.342774           0.44
## Cd3d        4.550088e-69   1.109206e-84            5.003268           0.76
## Trgc1       6.384562e-40   1.313098e-58            6.089648           0.44
## Dpp4        1.094045e-31   2.332838e-28            4.740599           0.26
##       Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val Sample_1_avg_log2FC
## Cd3e           0.012      1.716109e-141  6.817531e-289            6.549859
## Cd3g           0.032       2.641582e-81  3.129003e-224            5.937174
## Trgc2          0.011       1.303458e-54  2.973040e-146            7.038254
## Cd3d           0.031       1.963960e-80  1.434006e-145            4.977010
## Trgc1          0.011       2.324972e-54  7.599189e-117            6.997875
## Dpp4           0.011       4.130523e-24   1.561138e-94            5.420075
##       Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj     max_pval
## Cd3e           0.981          0.011      1.207112e-284 3.548072e-15
## Cd3g           0.864          0.019      5.540213e-220 2.133633e-12
## Trgc2          0.505          0.005      5.264065e-142 2.027462e-05
## Cd3d           0.738          0.035      2.539051e-141 2.075574e-15
## Trgc1          0.437          0.007      1.345512e-112 1.247819e-05
## Dpp4           0.379          0.008       2.764151e-90 6.067777e-09
##       minimump_p_val
## Cd3e   4.772271e-288
## Cd3g   2.190302e-223
## Trgc2  2.081128e-145
## Cd3d   1.003804e-144
## Trgc1  5.319432e-116
## Dpp4    1.092797e-93
FeaturePlot(merged_object, features = c("Cd3e", "Cd3g", "Trgc2", "Cd3d", "Trgc1", "Dpp4"), min.cutoff = 'q10', reduction = "umap")

Cluster 6

markers_c6 <- FindConservedMarkers(merged_object, ident.1 = 6, grouping.var = "orig.ident")
## Warning: Identity: 6 not present in group Sample_4. Skipping Sample_4
## Warning: Sample_7 has fewer than 3 cells in Identity: 6. Skipping Sample_7
## Testing group Sample_8: (6) vs (4, 0, 7, 5, 2, 1, 9, 3, 8)
## Testing group Sample_3: (6) vs (5, 1, 2, 4, 3, 0, 7, 8, 9)
## Testing group Sample_2: (6) vs (1, 3, 2, 5, 0, 4, 9, 7, 8)
## Testing group Sample_6: (6) vs (0, 4, 1, 5, 2, 7, 3, 8, 9)
## Testing group Sample_1: (6) vs (1, 2, 0, 4, 3, 5, 7, 8, 9)
head(markers_c6)
##        Sample_8_p_val Sample_8_avg_log2FC Sample_8_pct.1 Sample_8_pct.2
## Oas3     6.830673e-17           4.9252370          0.417          0.020
## Mx1      8.286387e-18           4.5915557          0.500          0.028
## Rsad2    7.110979e-04           2.4915473          0.167          0.020
## Ifit1    3.452357e-21           4.3184725          0.500          0.022
## Slfn5    4.835250e-01           0.6795144          0.083          0.041
## Ifi204   1.802980e-11           3.8519952          0.333          0.020
##        Sample_8_p_val_adj Sample_3_p_val Sample_3_avg_log2FC Sample_3_pct.1
## Oas3         1.209439e-12   3.018117e-68            6.911300          0.667
## Mx1          1.467188e-13   6.064391e-32            5.534447          0.583
## Rsad2        1.000000e+00   1.327859e-14            5.585285          0.333
## Ifit1        6.112742e-17   6.750795e-09            3.557636          0.250
## Slfn5        1.000000e+00   1.355641e-16            5.009240          0.417
## Ifi204       3.192356e-07   5.244589e-42            5.652052          0.750
##        Sample_3_pct.2 Sample_3_p_val_adj Sample_2_p_val Sample_2_avg_log2FC
## Oas3            0.007       5.343878e-64   1.188691e-08            5.081359
## Mx1             0.020       1.073761e-27   1.672608e-22            5.315476
## Rsad2           0.016       2.351108e-10   4.195103e-56            6.316829
## Ifit1           0.016       1.195296e-04   8.426429e-07            4.291554
## Slfn5           0.022       2.400298e-12   8.867843e-04            4.615357
## Ifi204          0.025       9.286068e-38   1.355291e-30            5.122179
##        Sample_2_pct.1 Sample_2_pct.2 Sample_2_p_val_adj Sample_6_p_val
## Oas3              0.2          0.009       2.104696e-04   9.328074e-07
## Mx1               0.5          0.019       2.961519e-18   1.044571e-23
## Rsad2             0.7          0.011       7.427849e-52   5.320054e-14
## Ifit1             0.2          0.013       1.491983e-02   1.349365e-44
## Slfn5             0.2          0.026       1.000000e+00   1.908852e-10
## Ifi204            0.6          0.019       2.399678e-26   6.849094e-18
##        Sample_6_avg_log2FC Sample_6_pct.1 Sample_6_pct.2 Sample_6_p_val_adj
## Oas3              4.254094            0.2          0.013       1.651629e-02
## Mx1               4.533246            0.5          0.018       1.849517e-19
## Rsad2             6.417870            0.2          0.005       9.419687e-10
## Ifit1             6.333328            0.5          0.007       2.389186e-40
## Slfn5             4.377011            0.3          0.017       3.379813e-06
## Ifi204            4.303219            0.4          0.016       1.212701e-13
##        Sample_1_p_val Sample_1_avg_log2FC Sample_1_pct.1 Sample_1_pct.2
## Oas3     1.610099e-50            4.636630          0.465          0.026
## Mx1      1.038707e-56            4.700852          0.535          0.032
## Rsad2    3.082650e-09            3.401074          0.140          0.015
## Ifit1    1.900743e-24            3.556260          0.279          0.021
## Slfn5    6.054133e-43            4.166827          0.395          0.022
## Ifi204   4.002328e-40            4.620068          0.349          0.018
##        Sample_1_p_val_adj     max_pval minimump_p_val
## Oas3         2.850842e-46 9.328074e-07   1.509059e-67
## Mx1          1.839134e-52 8.286387e-18   5.193534e-56
## Rsad2        5.458139e-05 7.110979e-04   2.097551e-55
## Ifit1        3.365456e-20 8.426429e-07   6.746826e-44
## Slfn5        1.071945e-38 4.835250e-01   3.027067e-42
## Ifi204       7.086523e-36 1.802980e-11   2.622294e-41
FeaturePlot(merged_object, features = c("Oas3", "Mx1", "Rsad2", "Ifit1", "Slfn5", "Ifi204"), min.cutoff = 'q10', reduction = "umap")

Cluster 7

markers_c7 <- FindConservedMarkers(merged_object, ident.1 = 7, grouping.var = "orig.ident")
## Testing group Sample_4: (7) vs (2, 1, 3, 0, 4, 5)
## Testing group Sample_7: (7) vs (4, 3, 5, 2, 1, 0, 6)
## Testing group Sample_8: (7) vs (4, 0, 5, 2, 1, 9, 3, 6, 8)
## Testing group Sample_3: (7) vs (5, 1, 2, 4, 3, 0, 6, 8, 9)
## Testing group Sample_2: (7) vs (1, 3, 2, 5, 0, 4, 6, 9, 8)
## Testing group Sample_6: (7) vs (0, 4, 1, 5, 2, 3, 8, 6, 9)
## Testing group Sample_1: (7) vs (1, 2, 0, 4, 3, 6, 5, 8, 9)
head(markers_c7)
##         Sample_4_p_val Sample_4_avg_log2FC Sample_4_pct.1 Sample_4_pct.2
## Cd8b1     7.384557e-19            9.099437            0.8          0.008
## Cd8a      4.316244e-23           10.623875            0.8          0.000
## Themis    1.072380e-28            9.722424            1.0          0.000
## Pdcd1     1.988629e-15            5.137715            0.8          0.017
## Fam169b   8.688751e-16            6.134311            0.8          0.017
## Havcr2    1.462949e-17            7.612434            0.6          0.000
##         Sample_4_p_val_adj Sample_7_p_val Sample_7_avg_log2FC Sample_7_pct.1
## Cd8b1         1.307510e-14   3.093571e-30            8.747824          0.714
## Cd8a          7.642341e-19   9.015139e-24            7.190432          0.714
## Themis        1.898756e-24   1.182509e-34            7.098802          0.714
## Pdcd1         3.521067e-11   3.616340e-11            5.067604          0.429
## Fam169b       1.538430e-11   1.506891e-34            6.698552          0.714
## Havcr2        2.590298e-13   6.318577e-12            7.861271          0.286
##         Sample_7_pct.2 Sample_7_p_val_adj Sample_8_p_val Sample_8_avg_log2FC
## Cd8b1            0.008       5.477477e-26   3.006635e-61            7.929075
## Cd8a             0.016       1.596220e-19   4.491718e-64            7.762283
## Themis           0.004       2.093751e-30   2.601179e-87            8.421810
## Pdcd1            0.016       6.403092e-07   5.239091e-33            5.887830
## Fam169b          0.004       2.668102e-30   8.933740e-18            4.558485
## Havcr2           0.004       1.118767e-07   6.350868e-05            5.205317
##         Sample_8_pct.1 Sample_8_pct.2 Sample_8_p_val_adj Sample_3_p_val
## Cd8b1            0.714          0.005       5.323548e-57  4.438016e-150
## Cd8a             0.857          0.008       7.953036e-60   7.587654e-83
## Themis           0.857          0.003       4.605647e-83   2.293814e-53
## Pdcd1            0.714          0.016       9.276335e-29   1.380913e-96
## Fam169b          0.429          0.011       1.581808e-13   3.526183e-25
## Havcr2           0.143          0.007       1.000000e+00   3.403776e-37
##         Sample_3_avg_log2FC Sample_3_pct.1 Sample_3_pct.2 Sample_3_p_val_adj
## Cd8b1             10.047035            1.0          0.001      7.857951e-146
## Cd8a               7.923414            1.0          0.007       1.343470e-78
## Themis             8.262980            0.8          0.009       4.061428e-49
## Pdcd1              8.636668            0.8          0.002       2.445045e-92
## Fam169b            7.754720            0.4          0.005       6.243459e-21
## Havcr2             7.923650            0.6          0.007       6.026726e-33
##         Sample_2_p_val Sample_2_avg_log2FC Sample_2_pct.1 Sample_2_pct.2
## Cd8b1    1.699719e-124           11.151140            0.8          0.001
## Cd8a      2.259005e-63            9.097452            0.8          0.007
## Themis   3.234194e-124            9.805786            0.8          0.001
## Pdcd1     5.559121e-89            7.410570            0.8          0.003
## Fam169b  1.963940e-103            7.929846            0.8          0.002
## Havcr2    3.050592e-23            6.884515            0.4          0.006
##         Sample_2_p_val_adj Sample_6_p_val Sample_6_avg_log2FC Sample_6_pct.1
## Cd8b1        3.009522e-120  3.305878e-140           10.675293            1.0
## Cd8a          3.999794e-59  9.841570e-110            9.914961            1.0
## Themis       5.726464e-120   1.634437e-49            7.779124            0.8
## Pdcd1         9.842980e-85   1.292792e-57            7.058317            0.8
## Fam169b       3.477351e-99   6.488996e-51            6.978928            0.6
## Havcr2        5.401378e-19   7.059931e-90            7.896481            0.8
##         Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val Sample_1_avg_log2FC
## Cd8b1            0.002      5.853388e-136  7.195229e-262           11.381910
## Cd8a             0.005      1.742548e-105  1.393163e-185            9.100785
## Themis           0.010       2.893934e-45   2.361902e-82            6.702105
## Pdcd1            0.008       2.289018e-53  1.017534e-103            7.332221
## Fam169b          0.005       1.148942e-46   8.955323e-14            4.909992
## Havcr2           0.003       1.250031e-85   6.149526e-30            6.878676
##         Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj     max_pval
## Cd8b1            1.000          0.002      1.273987e-257 7.384557e-19
## Cd8a             1.000          0.005      2.466735e-181 4.316244e-23
## Themis           0.667          0.007       4.181984e-78 1.072380e-28
## Pdcd1            0.444          0.001       1.801645e-99 3.616340e-11
## Fam169b          0.222          0.006       1.585630e-09 8.955323e-14
## Havcr2           0.333          0.006       1.088835e-25 6.350868e-05
##         minimump_p_val
## Cd8b1    5.036660e-261
## Cd8a     9.752142e-185
## Themis   2.263936e-123
## Pdcd1    7.122735e-103
## Fam169b  1.374758e-102
## Havcr2    4.941951e-89
FeaturePlot(merged_object, features = c("Cd8b1", "Cd8a", "Themis", "Pdcd1", "Fam169b", "Havcr2"), min.cutoff = 'q10', reduction = "umap")

Cluster 8

markers_c8 <- FindConservedMarkers(merged_object, ident.1 = 8, grouping.var = "orig.ident")
## Warning: Identity: 8 not present in group Sample_4. Skipping Sample_4
## Warning: Identity: 8 not present in group Sample_7. Skipping Sample_7
## Testing group Sample_8: (8) vs (4, 0, 7, 5, 2, 1, 9, 3, 6)
## Testing group Sample_3: (8) vs (5, 1, 2, 4, 3, 0, 7, 6, 9)
## Warning: Sample_2 has fewer than 3 cells in Identity: 8. Skipping Sample_2
## Testing group Sample_6: (8) vs (0, 4, 1, 5, 2, 7, 3, 6, 9)
## Testing group Sample_1: (8) vs (1, 2, 0, 4, 3, 6, 5, 7, 9)
head(markers_c8)
##       Sample_8_p_val Sample_8_avg_log2FC Sample_8_pct.1 Sample_8_pct.2
## Pclaf  6.210833e-102            8.769795          1.000          0.002
## E2f1    3.394789e-37            7.660554          0.667          0.005
## Kntc1   3.792734e-16            5.741189          0.333          0.003
## Nuf2    7.161577e-91            9.267855          0.667          0.000
## Spc24   7.161577e-91            9.720665          0.667          0.000
## Cdc6    1.036754e-81            7.851885          1.000          0.003
##       Sample_8_p_val_adj Sample_3_p_val Sample_3_avg_log2FC Sample_3_pct.1
## Pclaf       1.099690e-97   7.406302e-81           10.665802          0.667
## E2f1        6.010813e-33   1.663237e-67            6.477685          1.000
## Kntc1       6.715414e-12   4.302516e-16            5.452783          0.333
## Nuf2        1.268029e-86   1.184810e-40            6.564405          0.667
## Spc24       1.268029e-86   2.664260e-21            7.475725          0.333
## Cdc6        1.835676e-77   3.035532e-31            8.660000          0.333
##       Sample_3_pct.2 Sample_3_p_val_adj Sample_6_p_val Sample_6_avg_log2FC
## Pclaf          0.001       1.311360e-76   1.343420e-33            8.091555
## E2f1           0.006       2.944927e-63   6.546889e-36            6.724959
## Kntc1          0.004       7.618034e-12   3.057158e-42            6.253711
## Nuf2           0.005       2.097824e-36   3.132765e-09            5.314474
## Spc24          0.002       4.717339e-17   2.943684e-27            5.193999
## Cdc6           0.001       5.374713e-27   8.514606e-74            8.109040
##       Sample_6_pct.1 Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val
## Pclaf          0.500          0.007       2.378659e-29   1.196654e-67
## E2f1           0.667          0.012       1.159192e-31   6.911734e-95
## Kntc1          0.500          0.005       5.413004e-38   2.994900e-93
## Nuf2           0.167          0.003       5.546874e-05   6.957314e-07
## Spc24          0.500          0.009       5.212088e-23   4.872996e-51
## Cdc6           0.500          0.001       1.507596e-69   1.755461e-63
##       Sample_1_avg_log2FC Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj
## Pclaf            7.797064            0.5          0.005       2.118795e-63
## E2f1             7.000323            0.7          0.007       1.223792e-90
## Kntc1            7.335552            0.4          0.001       5.302770e-89
## Nuf2             4.493786            0.1          0.003       1.231862e-02
## Spc24            6.386756            0.4          0.004       8.628126e-47
## Cdc6             7.767022            0.3          0.001       3.108219e-59
##           max_pval minimump_p_val
## Pclaf 1.343420e-33  2.484333e-101
## E2f1  6.546889e-36   2.764694e-94
## Kntc1 4.302516e-16   1.197960e-92
## Nuf2  6.957314e-07   2.864631e-90
## Spc24 2.664260e-21   2.864631e-90
## Cdc6  3.035532e-31   4.147015e-81
FeaturePlot(merged_object, features = c("Pclaf", "E2f1", "Kntc1", "Nuf2", "Spc24", "Cdc6"), min.cutoff = 'q10', reduction = "umap")

Cluster 9

markers_c9 <- FindConservedMarkers(merged_object, ident.1 = 9, grouping.var = "orig.ident")
## Warning: Identity: 9 not present in group Sample_4. Skipping Sample_4
## Warning: Identity: 9 not present in group Sample_7. Skipping Sample_7
## Testing group Sample_8: (9) vs (4, 0, 7, 5, 2, 1, 3, 6, 8)
## Testing group Sample_3: (9) vs (5, 1, 2, 4, 3, 0, 7, 6, 8)
## Testing group Sample_2: (9) vs (1, 3, 2, 5, 0, 4, 6, 7, 8)
## Testing group Sample_6: (9) vs (0, 4, 1, 5, 2, 7, 3, 8, 6)
## Warning: Sample_1 has fewer than 3 cells in Identity: 9. Skipping Sample_1
head(markers_c9)
##        Sample_8_p_val Sample_8_avg_log2FC Sample_8_pct.1 Sample_8_pct.2
## Cd40     2.148713e-12            5.085492           0.25          0.003
## Clec4d  4.824541e-102           11.186172           0.75          0.000
## Sirpa   6.432115e-109            9.865326           1.00          0.002
## Pirb    4.824541e-102           10.333748           0.75          0.000
## Tgfbi    1.940361e-20            6.953467           0.50          0.008
## Il1b     3.176763e-69           11.295252           1.00          0.007
##        Sample_8_p_val_adj Sample_3_p_val Sample_3_avg_log2FC Sample_3_pct.1
## Cd40         3.804511e-08   1.805919e-80            8.067746          0.667
## Clec4d       8.542332e-98   3.078131e-16            7.245606          0.333
## Sirpa       1.138870e-104  9.216522e-180           12.181824          1.000
## Pirb         8.542332e-98  9.216522e-180           11.720400          1.000
## Tgfbi        3.435603e-16  2.831514e-135           12.333369          1.000
## Il1b         5.624776e-65  1.713555e-135           14.178525          1.000
##        Sample_3_pct.2 Sample_3_p_val_adj Sample_2_p_val Sample_2_avg_log2FC
## Cd40            0.001       3.197561e-76   1.211856e-22            7.263443
## Clec4d          0.004       5.450138e-12  1.854089e-193           13.258277
## Sirpa           0.000      1.631877e-175  1.498412e-145            9.568742
## Pirb            0.000      1.631877e-175   1.545385e-65            8.507640
## Tgfbi           0.001      5.013479e-131  9.069434e-146           10.084735
## Il1b            0.001      3.034020e-131  3.818238e-117           14.073994
##        Sample_2_pct.1 Sample_2_pct.2 Sample_2_p_val_adj Sample_6_p_val
## Cd40            0.333          0.002       2.145712e-18  5.576688e-195
## Clec4d          1.000          0.000      3.282850e-189   8.533422e-34
## Sirpa           1.000          0.001      2.653089e-141   4.782747e-66
## Pirb            0.667          0.002       2.736259e-61   5.235819e-23
## Tgfbi           1.000          0.001      1.605834e-141   5.235819e-23
## Il1b            1.000          0.002      6.760572e-113   2.567745e-53
##        Sample_6_avg_log2FC Sample_6_pct.1 Sample_6_pct.2 Sample_6_p_val_adj
## Cd40             12.564580          1.000          0.000      9.874084e-191
## Clec4d            8.557133          0.333          0.001       1.510928e-29
## Sirpa            10.429122          0.333          0.000       8.468331e-62
## Pirb              7.733398          0.333          0.002       9.270541e-19
## Tgfbi             8.048040          0.333          0.002       9.270541e-19
## Il1b              9.920172          0.667          0.003       4.546450e-49
##            max_pval minimump_p_val
## Cd40   2.148713e-12  2.230675e-194
## Clec4d 3.078131e-16  7.416356e-193
## Sirpa  4.782747e-66  3.686609e-179
## Pirb   5.235819e-23  3.686609e-179
## Tgfbi  1.940361e-20  3.627774e-145
## Il1b   2.567745e-53  6.854218e-135
FeaturePlot(merged_object, features = c("Cd40", "Clec4d", "Sirpa", "Pirb", "Tgfbi", "Il1b"), min.cutoff = 'q10', reduction = "umap")

Reference based cell type annotation using cell typist (refer to the celltypist_run.py script in the repo)

expression_matrix <- GetAssayData(merged_object, assay = "RNA", slot = "data")
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
dense_matrix <- as.matrix(expression_matrix)
expression_df <- as.data.frame(t(dense_matrix))
expression_df$cell_id <- rownames(expression_df)
expression_df <- expression_df[, c(ncol(expression_df), 1:(ncol(expression_df) - 1))]
write.csv(expression_df, file = "seurat_expression.csv", row.names = FALSE, quote = FALSE)

predictions <- read.csv("/Users/gerdalukosiute/Downloads/Thesis/analysis_mCRC/predicted_labels.csv")
head(predictions)
##           X predicted_labels over_clustering majority_voting
## 1  s1_14310          NK cell              64         NK cell
## 2  s1_38613          NK cell              72         NK cell
## 3  s1_44195          NK cell              11         NK cell
## 4  s1_93453          NK cell              20         NK cell
## 5 s1_157761          NK cell              55         NK cell
## 6 s1_214830          NK cell             123         NK cell
rownames(predictions) <- predictions$cell_id
merged_object <- AddMetaData(merged_object, metadata = predictions$predicted_label, col.name = "celltypist_labels")
table(merged_object@meta.data$celltypist_labels)
## 
##  Activated CD4+ T cell              Cd11c Mac              Cd206 Mac 
##                      9                      4                      1 
##                    CD4            CD8+ T cell            Clec4e mono 
##                     12                     45                      1 
##                     DC              EarlyGC_2             Enterocyte 
##                      1                      4                      2 
##                   ILC1 Inflammatory Monocytes          Naive B cells 
##                      8                      7                      8 
##             Neutrophil                NK cell               NKT cell 
##                      2                   5073                      5 
##                     TA 
##                      1

Subsetting for NK and ILC1 cells

Based on the celltypist_run.py and the feature plot below we deduce which cells are NK and ILC1.

FeaturePlot(merged_object, features = c("Cd3e", "Cd3d", "Tbx21", "Eomes", "Cd8b1", "Nkg7", "Gzmb"), min.cutoff = 'q10', reduction = "umap")

NK_cells_maybe <- subset(merged_object, subset = seurat_clusters %in% c(0, 1, 2, 3, 6))
ILC1_cells_maybe <- subset(merged_object, subset = seurat_clusters %in% c(4))

Finding markers for the respective subsets

#Finding markers for NK subset (upregulated)
NK_markers <- FindAllMarkers(NK_cells_maybe, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 6
head(NK_markers)
##                p_val avg_log2FC pct.1 pct.2     p_val_adj cluster   gene
## Gzmb    0.000000e+00  2.2340264 0.975 0.683  0.000000e+00       0   Gzmb
## Tmsb4x  0.000000e+00  0.9310255 1.000 1.000  0.000000e+00       0 Tmsb4x
## Gzma   2.687576e-305  1.0713978 0.999 0.989 4.758622e-301       0   Gzma
## Actb   1.683474e-295  1.0195255 1.000 0.999 2.980759e-291       0   Actb
## Prf1   4.909488e-277  1.5184646 0.984 0.803 8.692740e-273       0   Prf1
## Itgb2  8.710347e-233  1.2802615 0.974 0.775 1.542254e-228       0  Itgb2
#Finding markers for ILC subset (upregulated)
ILC1_markers <- FindMarkers(merged_object, ident.1 = 4, ident.2 = c(0, 1, 2, 3, 6), min.pct = 0.25, logfc.threshold = 0.25)
head(ILC1_markers)
##          p_val avg_log2FC pct.1 pct.2 p_val_adj
## Il7r         0   5.372583 0.763 0.041         0
## Inpp4b       0   4.433715 0.662 0.054         0
## Tmem176b     0   6.204424 0.595 0.012         0
## Trgc4        0   5.121718 0.573 0.021         0
## Cxcr6        0   6.593900 0.560 0.010         0
## Tmem176a     0   6.772655 0.508 0.006         0

Gene Ontology Enrichment

#Extracting gene names
NK_gene_list <- rownames(NK_markers[NK_markers$p_val_adj < 0.05 & NK_markers$avg_log2FC > 0.5, ])
ILC1_gene_list <- rownames(ILC1_markers[ILC1_markers$p_val_adj < 0.05 & ILC1_markers$avg_log2FC > 0.5, ])

#GO Enrichment
NK_GO <- enrichGO(gene = NK_gene_list, OrgDb = org.Mm.eg.db, keyType = "SYMBOL", ont = "BP", pAdjustMethod = "BH", pvalueCutoff = 0.05, readable = TRUE)
ILC1_GO <- enrichGO(gene = ILC1_gene_list, OrgDb = org.Mm.eg.db, keyType = "SYMBOL", ont = "BP", pAdjustMethod = "BH", pvalueCutoff = 0.05, readable = TRUE)

#Plots
dotplot(NK_GO, showCategory = 15, title = "NK Cell GO Enrichment")

dotplot(ILC1_GO, showCategory = 15, title = "ILC1 GO Enrichment")

Comparing Clusters (returned null)

#Comparing
compareClusterResult <- compareCluster(
  geneClusters = list(NK = NK_gene_list, ILC1 = ILC1_gene_list),
  fun = "enrichGO",
  OrgDb = org.Mm.eg.db,
  ont = "BP",
  pAdjustMethod = "BH",
  pvalueCutoff = 0.05
)
## Warning in compareCluster(geneClusters = list(NK = NK_gene_list, ILC1 =
## ILC1_gene_list), : No enrichment found in any of gene cluster, please check
## your input...

GSEA

#Markers for NK vs ILC1
NK_vs_ILC1_DEGs <- FindMarkers(
  merged_object, 
  ident.1 = c(0, 1, 2, 3, 6), 
  ident.2 = 4,                
  min.pct = 0.1, 
  logfc.threshold = 0
)

#Ranked gene list based on logFC
ranked_genes <- NK_vs_ILC1_DEGs$avg_log2FC
names(ranked_genes) <- rownames(NK_vs_ILC1_DEGs)

#Sort genes in decreasing order for GSEA
ranked_genes <- sort(ranked_genes, decreasing = TRUE)

#GSEA (to see the pathways enriched in both)
GSEA_result <- gseGO(
  geneList = ranked_genes, 
  OrgDb = org.Mm.eg.db, 
  keyType = "SYMBOL",
  ont = "BP",
  pAdjustMethod = "BH",
  pvalueCutoff = 0.05
)
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
#PLOT MISSING!!!!!!!!!!!!!!!

ILC1 KEGG enrichment

ILC1_gene_IDs <- mapIds(
  org.Mm.eg.db, 
  keys = ILC1_gene_list, 
  column = "ENTREZID", 
  keytype = "SYMBOL", 
  multiVals = "first"
)
## 'select()' returned 1:1 mapping between keys and columns
ILC1_gene_IDs <- ILC1_gene_IDs[!is.na(ILC1_gene_IDs)]


ILC1_KEGG <- enrichKEGG(
  gene = ILC1_gene_IDs,
  organism = "mmu",
  pAdjustMethod = "BH",
  pvalueCutoff = 0.05
)

dotplot(ILC1_KEGG, showCategory = 10, title = "ILC1 Cell Kegg Cytokine Pathways")

ILC1_cytokine_pathways <- ILC1_KEGG@result[
  grep("cytokine|TNF|JAK|STAT|interferon|TGF", ILC1_KEGG@result$Description, ignore.case = TRUE), ]

head(ILC1_cytokine_pathways$Description, 50)
## [1] "Cytokine-cytokine receptor interaction - Mus musculus (house mouse)"                       
## [2] "Viral protein interaction with cytokine and cytokine receptor - Mus musculus (house mouse)"
## [3] "JAK-STAT signaling pathway - Mus musculus (house mouse)"                                   
## [4] "TNF signaling pathway - Mus musculus (house mouse)"                                        
## [5] "Adipocytokine signaling pathway - Mus musculus (house mouse)"                              
## [6] "Prostate cancer - Mus musculus (house mouse)"                                              
## [7] "TGF-beta signaling pathway - Mus musculus (house mouse)"